From 242c95aab1085dc5af9a017c00fe78c58386229f Mon Sep 17 00:00:00 2001 From: DeepakRajendrakumaran Date: Fri, 16 Sep 2022 13:58:59 -0700 Subject: [PATCH] Adding zmmStateSupport and AVX512F, AVX512CD, AVX512BW, AVX512DQ and AVX512VL ISAs. (#74113) * Adding avx512StateSupport(). This adds the helper functions to check if avx512 functionality is supported. * Adding AVX512F, AVX512CD, AVX512BW, AVX512DQ and AVX512VL ISAs. * Updating JIT/EE Version Guid. --- src/coreclr/inc/clrconfigvalues.h | 8 + src/coreclr/inc/corinfoinstructionset.h | 252 ++++++++++++--- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 8 + src/coreclr/jit/jitconfigvalues.h | 8 + .../nativeaot/Runtime/IntrinsicConstants.h | 8 + src/coreclr/nativeaot/Runtime/PalRedhawk.h | 2 + src/coreclr/nativeaot/Runtime/amd64/GC.asm | 15 + src/coreclr/nativeaot/Runtime/startup.cpp | 42 +++ .../nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 18 ++ .../Runtime/windows/PalRedhawkMinWin.cpp | 25 ++ src/coreclr/pal/src/arch/amd64/processor.cpp | 29 ++ .../Runtime/ReadyToRunInstructionSet.cs | 8 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 32 ++ .../JitInterface/CorInfoInstructionSet.cs | 302 +++++++++++++++--- .../ThunkGenerator/InstructionSetDesc.txt | 26 ++ .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 24 ++ src/coreclr/vm/amd64/AsmHelpers.asm | 16 + src/coreclr/vm/amd64/unixstubs.cpp | 12 + src/coreclr/vm/cgensys.h | 1 + src/coreclr/vm/codeman.cpp | 139 +++++++- src/coreclr/vm/i386/cgenx86.cpp | 37 +++ 22 files changed, 940 insertions(+), 82 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index a600138a65dc2..1ce2609af45ef 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -752,6 +752,14 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVX VNNI+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 62c7466b6897d..18bde83aace1a 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -60,25 +60,41 @@ enum CORINFO_InstructionSet InstructionSet_AVXVNNI=19, InstructionSet_MOVBE=20, InstructionSet_X86Serialize=21, - InstructionSet_X86Base_X64=22, - InstructionSet_SSE_X64=23, - InstructionSet_SSE2_X64=24, - InstructionSet_SSE3_X64=25, - InstructionSet_SSSE3_X64=26, - InstructionSet_SSE41_X64=27, - InstructionSet_SSE42_X64=28, - InstructionSet_AVX_X64=29, - InstructionSet_AVX2_X64=30, - InstructionSet_AES_X64=31, - InstructionSet_BMI1_X64=32, - InstructionSet_BMI2_X64=33, - InstructionSet_FMA_X64=34, - InstructionSet_LZCNT_X64=35, - InstructionSet_PCLMULQDQ_X64=36, - InstructionSet_POPCNT_X64=37, - InstructionSet_AVXVNNI_X64=38, - InstructionSet_MOVBE_X64=39, - InstructionSet_X86Serialize_X64=40, + InstructionSet_AVX512F=22, + InstructionSet_AVX512F_VL=23, + InstructionSet_AVX512BW=24, + InstructionSet_AVX512BW_VL=25, + InstructionSet_AVX512CD=26, + InstructionSet_AVX512CD_VL=27, + InstructionSet_AVX512DQ=28, + InstructionSet_AVX512DQ_VL=29, + InstructionSet_X86Base_X64=30, + InstructionSet_SSE_X64=31, + InstructionSet_SSE2_X64=32, + InstructionSet_SSE3_X64=33, + InstructionSet_SSSE3_X64=34, + InstructionSet_SSE41_X64=35, + InstructionSet_SSE42_X64=36, + InstructionSet_AVX_X64=37, + InstructionSet_AVX2_X64=38, + InstructionSet_AES_X64=39, + InstructionSet_BMI1_X64=40, + InstructionSet_BMI2_X64=41, + InstructionSet_FMA_X64=42, + InstructionSet_LZCNT_X64=43, + InstructionSet_PCLMULQDQ_X64=44, + InstructionSet_POPCNT_X64=45, + InstructionSet_AVXVNNI_X64=46, + InstructionSet_MOVBE_X64=47, + InstructionSet_X86Serialize_X64=48, + InstructionSet_AVX512F_X64=49, + InstructionSet_AVX512F_VL_X64=50, + InstructionSet_AVX512BW_X64=51, + InstructionSet_AVX512BW_VL_X64=52, + InstructionSet_AVX512CD_X64=53, + InstructionSet_AVX512CD_VL_X64=54, + InstructionSet_AVX512DQ_X64=55, + InstructionSet_AVX512DQ_VL_X64=56, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -102,25 +118,41 @@ enum CORINFO_InstructionSet InstructionSet_AVXVNNI=19, InstructionSet_MOVBE=20, InstructionSet_X86Serialize=21, - InstructionSet_X86Base_X64=22, - InstructionSet_SSE_X64=23, - InstructionSet_SSE2_X64=24, - InstructionSet_SSE3_X64=25, - InstructionSet_SSSE3_X64=26, - InstructionSet_SSE41_X64=27, - InstructionSet_SSE42_X64=28, - InstructionSet_AVX_X64=29, - InstructionSet_AVX2_X64=30, - InstructionSet_AES_X64=31, - InstructionSet_BMI1_X64=32, - InstructionSet_BMI2_X64=33, - InstructionSet_FMA_X64=34, - InstructionSet_LZCNT_X64=35, - InstructionSet_PCLMULQDQ_X64=36, - InstructionSet_POPCNT_X64=37, - InstructionSet_AVXVNNI_X64=38, - InstructionSet_MOVBE_X64=39, - InstructionSet_X86Serialize_X64=40, + InstructionSet_AVX512F=22, + InstructionSet_AVX512F_VL=23, + InstructionSet_AVX512BW=24, + InstructionSet_AVX512BW_VL=25, + InstructionSet_AVX512CD=26, + InstructionSet_AVX512CD_VL=27, + InstructionSet_AVX512DQ=28, + InstructionSet_AVX512DQ_VL=29, + InstructionSet_X86Base_X64=30, + InstructionSet_SSE_X64=31, + InstructionSet_SSE2_X64=32, + InstructionSet_SSE3_X64=33, + InstructionSet_SSSE3_X64=34, + InstructionSet_SSE41_X64=35, + InstructionSet_SSE42_X64=36, + InstructionSet_AVX_X64=37, + InstructionSet_AVX2_X64=38, + InstructionSet_AES_X64=39, + InstructionSet_BMI1_X64=40, + InstructionSet_BMI2_X64=41, + InstructionSet_FMA_X64=42, + InstructionSet_LZCNT_X64=43, + InstructionSet_PCLMULQDQ_X64=44, + InstructionSet_POPCNT_X64=45, + InstructionSet_AVXVNNI_X64=46, + InstructionSet_MOVBE_X64=47, + InstructionSet_X86Serialize_X64=48, + InstructionSet_AVX512F_X64=49, + InstructionSet_AVX512F_VL_X64=50, + InstructionSet_AVX512BW_X64=51, + InstructionSet_AVX512BW_VL_X64=52, + InstructionSet_AVX512CD_X64=53, + InstructionSet_AVX512CD_VL_X64=54, + InstructionSet_AVX512DQ_X64=55, + InstructionSet_AVX512DQ_VL_X64=56, #endif // TARGET_X86 }; @@ -272,6 +304,22 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_MOVBE_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); + if (HasInstructionSet(InstructionSet_AVX512F)) + AddInstructionSet(InstructionSet_AVX512F_X64); + if (HasInstructionSet(InstructionSet_AVX512F_VL)) + AddInstructionSet(InstructionSet_AVX512F_VL_X64); + if (HasInstructionSet(InstructionSet_AVX512BW)) + AddInstructionSet(InstructionSet_AVX512BW_X64); + if (HasInstructionSet(InstructionSet_AVX512BW_VL)) + AddInstructionSet(InstructionSet_AVX512BW_VL_X64); + if (HasInstructionSet(InstructionSet_AVX512CD)) + AddInstructionSet(InstructionSet_AVX512CD_X64); + if (HasInstructionSet(InstructionSet_AVX512CD_VL)) + AddInstructionSet(InstructionSet_AVX512CD_VL_X64); + if (HasInstructionSet(InstructionSet_AVX512DQ)) + AddInstructionSet(InstructionSet_AVX512DQ_X64); + if (HasInstructionSet(InstructionSet_AVX512DQ_VL)) + AddInstructionSet(InstructionSet_AVX512DQ_VL_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -420,6 +468,38 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -460,6 +540,22 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_MOVBE); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -502,6 +598,22 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_MOVBE); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); @@ -642,6 +754,38 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "X86Serialize"; case InstructionSet_X86Serialize_X64 : return "X86Serialize_X64"; + case InstructionSet_AVX512F : + return "AVX512F"; + case InstructionSet_AVX512F_X64 : + return "AVX512F_X64"; + case InstructionSet_AVX512F_VL : + return "AVX512F_VL"; + case InstructionSet_AVX512F_VL_X64 : + return "AVX512F_VL_X64"; + case InstructionSet_AVX512BW : + return "AVX512BW"; + case InstructionSet_AVX512BW_X64 : + return "AVX512BW_X64"; + case InstructionSet_AVX512BW_VL : + return "AVX512BW_VL"; + case InstructionSet_AVX512BW_VL_X64 : + return "AVX512BW_VL_X64"; + case InstructionSet_AVX512CD : + return "AVX512CD"; + case InstructionSet_AVX512CD_X64 : + return "AVX512CD_X64"; + case InstructionSet_AVX512CD_VL : + return "AVX512CD_VL"; + case InstructionSet_AVX512CD_VL_X64 : + return "AVX512CD_VL_X64"; + case InstructionSet_AVX512DQ : + return "AVX512DQ"; + case InstructionSet_AVX512DQ_X64 : + return "AVX512DQ_X64"; + case InstructionSet_AVX512DQ_VL : + return "AVX512DQ_VL"; + case InstructionSet_AVX512DQ_VL_X64 : + return "AVX512DQ_VL_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -686,6 +830,22 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "MOVBE"; case InstructionSet_X86Serialize : return "X86Serialize"; + case InstructionSet_AVX512F : + return "AVX512F"; + case InstructionSet_AVX512F_VL : + return "AVX512F_VL"; + case InstructionSet_AVX512BW : + return "AVX512BW"; + case InstructionSet_AVX512BW_VL : + return "AVX512BW_VL"; + case InstructionSet_AVX512CD : + return "AVX512CD"; + case InstructionSet_AVX512CD_VL : + return "AVX512CD_VL"; + case InstructionSet_AVX512DQ : + return "AVX512DQ"; + case InstructionSet_AVX512DQ_VL : + return "AVX512DQ_VL"; #endif // TARGET_X86 default: @@ -737,6 +897,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -758,6 +926,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 68d305e4f5d5d..311217aee254b 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 0cd8b9d4-04f4-45a7-b16b-7f24b7c0a454 */ - 0x0cd8b9d4, - 0x04f4, - 0x45a7, - {0xb1, 0x6b, 0x7f, 0x24, 0xb7, 0xc0, 0xa4, 0x54} +constexpr GUID JITEEVersionIdentifier = { /* eb8352bd-0a13-4b5b-badb-58f9ecc40c44 */ + 0xeb8352bd, + 0x0a13, + 0x4b5b, + {0xba, 0xdb, 0x58, 0xf9, 0xec, 0xc4, 0x0c, 0x44} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 658d66b806d99..a46f7133c6fda 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -37,6 +37,14 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Rcpc=26, READYTORUN_INSTRUCTION_Movbe=27, READYTORUN_INSTRUCTION_X86Serialize=28, + READYTORUN_INSTRUCTION_Avx512F=29, + READYTORUN_INSTRUCTION_Avx512F_VL=30, + READYTORUN_INSTRUCTION_Avx512BW=31, + READYTORUN_INSTRUCTION_Avx512BW_VL=32, + READYTORUN_INSTRUCTION_Avx512CD=33, + READYTORUN_INSTRUCTION_Avx512CD_VL=34, + READYTORUN_INSTRUCTION_Avx512DQ=35, + READYTORUN_INSTRUCTION_Avx512DQ_VL=36, }; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 771cc6e9cf7da..e209817ea1491 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -307,6 +307,14 @@ CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 1) // Allows Base+ h CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Allows AES+ hardware intrinsics to be disabled CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Allows AVX+ hardware intrinsics to be disabled CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Allows AVX2+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512BW, W("EnableAVX512BW"), 1) // Allows AVX512BW+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512CD, W("EnableAVX512CD"), 1) // Allows AVX512CD+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512DQ, W("EnableAVX512DQ"), 1) // Allows AVX512DQ+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512F, W("EnableAVX512F"), 1) // Allows AVX512F+ hardware intrinsics to be disabled +CONFIG_INTEGER(EnableAVX512F_VL, W("EnableAVX512F_VL"), 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Allows AVX VNNI+ hardware intrinsics to be disabled CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Allows BMI1+ hardware intrinsics to be disabled CONFIG_INTEGER(EnableBMI2, W("EnableBMI2"), 1) // Allows BMI2+ hardware intrinsics to be disabled diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index f13851f87c12b..e80b42ccb0ed3 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -24,6 +24,14 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, XArchIntrinsicConstants_AvxVnni = 0x2000, XArchIntrinsicConstants_Movbe = 0x4000, + XArchIntrinsicConstants_Avx512f = 0x8000, + XArchIntrinsicConstants_Avx512f_vl = 0x10000, + XArchIntrinsicConstants_Avx512bw = 0x20000, + XArchIntrinsicConstants_Avx512bw_vl = 0x40000, + XArchIntrinsicConstants_Avx512cd = 0x80000, + XArchIntrinsicConstants_Avx512cd_vl = 0x100000, + XArchIntrinsicConstants_Avx512dq = 0x200000, + XArchIntrinsicConstants_Avx512dq_vl = 0x400000, }; #endif //HOST_X86 || HOST_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 90578c47f0aa6..1532c4c0c3cfb 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -772,7 +772,9 @@ REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunctio #endif REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); +REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport(); REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); +REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled(); #endif // defined(HOST_X86) || defined(HOST_AMD64) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GC.asm b/src/coreclr/nativeaot/Runtime/amd64/GC.asm index 1dc8a85125371..7c90f90538476 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GC.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GC.asm @@ -18,4 +18,19 @@ LEAF_ENTRY xmmYmmStateSupport, _TEXT ret LEAF_END xmmYmmStateSupport, _TEXT +;; extern "C" DWORD __stdcall avx512StateSupport(); +LEAF_ENTRY avx512StateSupport, _TEXT + mov ecx, 0 ; Specify xcr0 + xgetbv ; result in EDX:EAX + and eax, 0E6H + cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support + jne not_supported + mov eax, 1 + jmp done + not_supported: + mov eax, 0 + done: + ret +LEAF_END avx512StateSupport, _TEXT + end diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 62a0f3ba9221c..6ecd662267b6e 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -245,6 +245,48 @@ bool DetectCPUFeatures() { g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; } + + if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + { + if ((cpuidInfo[EBX] & (1 << 16)) != 0) // AVX512F + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; + + bool isAVX512_VLSupported = false; + if ((cpuidInfo[EBX] & (1 << 31)) != 0) // AVX512VL + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl; + isAVX512_VLSupported = true; + } + + if ((cpuidInfo[EBX] & (1 << 30)) != 0) // AVX512BW + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw; + if (isAVX512_VLSupported) + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl; + } + } + + if ((cpuidInfo[EBX] & (1 << 28)) != 0) // AVX512CD + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd; + if (isAVX512_VLSupported) + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl; + } + } + + if ((cpuidInfo[EBX] & (1 << 17)) != 0) // AVX512DQ + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq; + if (isAVX512_VLSupported) + { + g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl; + } + } + } + } } } } diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index bcdbe419d2e07..7b789cbbd509f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -719,6 +719,11 @@ REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() return true; } +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled() +{ + return true; +} + REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) { // Write the message using lowest-level OS API available. This is used to print the stack overflow @@ -1287,6 +1292,19 @@ REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport() // check OS has enabled both XMM and YMM state support return ((eax & 0x06) == 0x06) ? 1 : 0; } + +REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI avx512StateSupport() +{ + DWORD eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled XMM, YMM and ZMM state support + return ((eax & 0xE6) == 0x0E6) ? 1 : 0; +} + #endif // defined(HOST_X86) || defined(HOST_AMD64) #if defined (HOST_ARM64) diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 20238e87501d5..7b758dee00331 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -563,6 +563,31 @@ REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() return TRUE; } +REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled() +{ + typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); + PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + + HMODULE hMod = LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if (hMod == NULL) + return FALSE; + + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); + + if (pfnGetEnabledXStateFeatures == NULL) + { + return FALSE; + } + + DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); + if ((FeatureMask & XSTATE_MASK_AVX512) == 0) + { + return FALSE; + } + + return TRUE; +} + REDHAWK_PALEXPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(uint32_t firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler) { return AddVectoredExceptionHandler(firstHandler, vectoredHandler); diff --git a/src/coreclr/pal/src/arch/amd64/processor.cpp b/src/coreclr/pal/src/arch/amd64/processor.cpp index 1c9e026cfed9b..0fe9ff7c18344 100644 --- a/src/coreclr/pal/src/arch/amd64/processor.cpp +++ b/src/coreclr/pal/src/arch/amd64/processor.cpp @@ -48,3 +48,32 @@ extern "C" unsigned int XmmYmmStateSupport() // Check OS has enabled both XMM and YMM state support return ((eax & 0x06) == 0x06) ? 1 : 0; } + +/*++ +Function: +Avx512StateSupport + +Check if OS has enabled XMM, YMM and ZMM state support + +Return value: +1 if XMM, YMM and ZMM are enabled, 0 otherwise +--*/ +extern "C" unsigned int Avx512StateSupport() +{ + unsigned int eax; + __asm(" mov $1, %%eax\n" \ + " cpuid\n" \ + " xor %%eax, %%eax\n" \ + " and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \ + " cmp $0x18000000, %%ecx\n" \ + " jne endz\n" \ + " xor %%ecx, %%ecx\n" \ + " xgetbv\n" \ + "endz:\n" \ + : "=a"(eax) /* output in eax */ \ + : /* no inputs */ \ + : "ebx", "ecx", "edx" /* registers that are clobbered */ + ); + // Check OS has enabled XMM, YMM and ZMM state support + return ((eax & 0x0E6) == 0x0E6) ? 1 : 0; +} diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index f7e1bb55a9b79..183c307c12bce 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -40,6 +40,14 @@ public enum ReadyToRunInstructionSet Rcpc=26, Movbe=27, X86Serialize=28, + Avx512F=29, + Avx512F_VL=30, + Avx512BW=31, + Avx512BW_VL=32, + Avx512CD=33, + Avx512CD_VL=34, + Avx512DQ=35, + Avx512DQ_VL=36, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index bde3aac7b4160..326ae38a5e713 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -93,6 +93,22 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_MOVBE_X64: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize; + case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F; + case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F; + case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; + case InstructionSet.X64_AVX512F_VL_X64: return ReadyToRunInstructionSet.Avx512F_VL; + case InstructionSet.X64_AVX512BW: return ReadyToRunInstructionSet.Avx512BW; + case InstructionSet.X64_AVX512BW_X64: return ReadyToRunInstructionSet.Avx512BW; + case InstructionSet.X64_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL; + case InstructionSet.X64_AVX512BW_VL_X64: return ReadyToRunInstructionSet.Avx512BW_VL; + case InstructionSet.X64_AVX512CD: return ReadyToRunInstructionSet.Avx512CD; + case InstructionSet.X64_AVX512CD_X64: return ReadyToRunInstructionSet.Avx512CD; + case InstructionSet.X64_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL; + case InstructionSet.X64_AVX512CD_VL_X64: return ReadyToRunInstructionSet.Avx512CD_VL; + case InstructionSet.X64_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ; + case InstructionSet.X64_AVX512DQ_X64: return ReadyToRunInstructionSet.Avx512DQ; + case InstructionSet.X64_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL; + case InstructionSet.X64_AVX512DQ_VL_X64: return ReadyToRunInstructionSet.Avx512DQ_VL; default: throw new Exception("Unknown instruction set"); } @@ -142,6 +158,22 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_MOVBE_X64: return null; case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X86_X86Serialize_X64: return null; + case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F; + case InstructionSet.X86_AVX512F_X64: return null; + case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; + case InstructionSet.X86_AVX512F_VL_X64: return null; + case InstructionSet.X86_AVX512BW: return ReadyToRunInstructionSet.Avx512BW; + case InstructionSet.X86_AVX512BW_X64: return null; + case InstructionSet.X86_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL; + case InstructionSet.X86_AVX512BW_VL_X64: return null; + case InstructionSet.X86_AVX512CD: return ReadyToRunInstructionSet.Avx512CD; + case InstructionSet.X86_AVX512CD_X64: return null; + case InstructionSet.X86_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL; + case InstructionSet.X86_AVX512CD_VL_X64: return null; + case InstructionSet.X86_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ; + case InstructionSet.X86_AVX512DQ_X64: return null; + case InstructionSet.X86_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL; + case InstructionSet.X86_AVX512DQ_VL_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 78eec3e129759..22d96d02fd593 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -58,6 +58,14 @@ public enum InstructionSet X64_AVXVNNI = InstructionSet_X64.AVXVNNI, X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Serialize = InstructionSet_X64.X86Serialize, + X64_AVX512F = InstructionSet_X64.AVX512F, + X64_AVX512F_VL = InstructionSet_X64.AVX512F_VL, + X64_AVX512BW = InstructionSet_X64.AVX512BW, + X64_AVX512BW_VL = InstructionSet_X64.AVX512BW_VL, + X64_AVX512CD = InstructionSet_X64.AVX512CD, + X64_AVX512CD_VL = InstructionSet_X64.AVX512CD_VL, + X64_AVX512DQ = InstructionSet_X64.AVX512DQ, + X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -77,6 +85,14 @@ public enum InstructionSet X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, X64_MOVBE_X64 = InstructionSet_X64.MOVBE_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, + X64_AVX512F_X64 = InstructionSet_X64.AVX512F_X64, + X64_AVX512F_VL_X64 = InstructionSet_X64.AVX512F_VL_X64, + X64_AVX512BW_X64 = InstructionSet_X64.AVX512BW_X64, + X64_AVX512BW_VL_X64 = InstructionSet_X64.AVX512BW_VL_X64, + X64_AVX512CD_X64 = InstructionSet_X64.AVX512CD_X64, + X64_AVX512CD_VL_X64 = InstructionSet_X64.AVX512CD_VL_X64, + X64_AVX512DQ_X64 = InstructionSet_X64.AVX512DQ_X64, + X64_AVX512DQ_VL_X64 = InstructionSet_X64.AVX512DQ_VL_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -98,6 +114,14 @@ public enum InstructionSet X86_AVXVNNI = InstructionSet_X86.AVXVNNI, X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Serialize = InstructionSet_X86.X86Serialize, + X86_AVX512F = InstructionSet_X86.AVX512F, + X86_AVX512F_VL = InstructionSet_X86.AVX512F_VL, + X86_AVX512BW = InstructionSet_X86.AVX512BW, + X86_AVX512BW_VL = InstructionSet_X86.AVX512BW_VL, + X86_AVX512CD = InstructionSet_X86.AVX512CD, + X86_AVX512CD_VL = InstructionSet_X86.AVX512CD_VL, + X86_AVX512DQ = InstructionSet_X86.AVX512DQ, + X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -117,6 +141,14 @@ public enum InstructionSet X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, X86_MOVBE_X64 = InstructionSet_X86.MOVBE_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, + X86_AVX512F_X64 = InstructionSet_X86.AVX512F_X64, + X86_AVX512F_VL_X64 = InstructionSet_X86.AVX512F_VL_X64, + X86_AVX512BW_X64 = InstructionSet_X86.AVX512BW_X64, + X86_AVX512BW_VL_X64 = InstructionSet_X86.AVX512BW_VL_X64, + X86_AVX512CD_X64 = InstructionSet_X86.AVX512CD_X64, + X86_AVX512CD_VL_X64 = InstructionSet_X86.AVX512CD_VL_X64, + X86_AVX512DQ_X64 = InstructionSet_X86.AVX512DQ_X64, + X86_AVX512DQ_VL_X64 = InstructionSet_X86.AVX512DQ_VL_X64, } public enum InstructionSet_ARM64 { @@ -170,25 +202,41 @@ public enum InstructionSet_X64 AVXVNNI = 19, MOVBE = 20, X86Serialize = 21, - X86Base_X64 = 22, - SSE_X64 = 23, - SSE2_X64 = 24, - SSE3_X64 = 25, - SSSE3_X64 = 26, - SSE41_X64 = 27, - SSE42_X64 = 28, - AVX_X64 = 29, - AVX2_X64 = 30, - AES_X64 = 31, - BMI1_X64 = 32, - BMI2_X64 = 33, - FMA_X64 = 34, - LZCNT_X64 = 35, - PCLMULQDQ_X64 = 36, - POPCNT_X64 = 37, - AVXVNNI_X64 = 38, - MOVBE_X64 = 39, - X86Serialize_X64 = 40, + AVX512F = 22, + AVX512F_VL = 23, + AVX512BW = 24, + AVX512BW_VL = 25, + AVX512CD = 26, + AVX512CD_VL = 27, + AVX512DQ = 28, + AVX512DQ_VL = 29, + X86Base_X64 = 30, + SSE_X64 = 31, + SSE2_X64 = 32, + SSE3_X64 = 33, + SSSE3_X64 = 34, + SSE41_X64 = 35, + SSE42_X64 = 36, + AVX_X64 = 37, + AVX2_X64 = 38, + AES_X64 = 39, + BMI1_X64 = 40, + BMI2_X64 = 41, + FMA_X64 = 42, + LZCNT_X64 = 43, + PCLMULQDQ_X64 = 44, + POPCNT_X64 = 45, + AVXVNNI_X64 = 46, + MOVBE_X64 = 47, + X86Serialize_X64 = 48, + AVX512F_X64 = 49, + AVX512F_VL_X64 = 50, + AVX512BW_X64 = 51, + AVX512BW_VL_X64 = 52, + AVX512CD_X64 = 53, + AVX512CD_VL_X64 = 54, + AVX512DQ_X64 = 55, + AVX512DQ_VL_X64 = 56, } public enum InstructionSet_X86 @@ -216,25 +264,41 @@ public enum InstructionSet_X86 AVXVNNI = 19, MOVBE = 20, X86Serialize = 21, - X86Base_X64 = 22, - SSE_X64 = 23, - SSE2_X64 = 24, - SSE3_X64 = 25, - SSSE3_X64 = 26, - SSE41_X64 = 27, - SSE42_X64 = 28, - AVX_X64 = 29, - AVX2_X64 = 30, - AES_X64 = 31, - BMI1_X64 = 32, - BMI2_X64 = 33, - FMA_X64 = 34, - LZCNT_X64 = 35, - PCLMULQDQ_X64 = 36, - POPCNT_X64 = 37, - AVXVNNI_X64 = 38, - MOVBE_X64 = 39, - X86Serialize_X64 = 40, + AVX512F = 22, + AVX512F_VL = 23, + AVX512BW = 24, + AVX512BW_VL = 25, + AVX512CD = 26, + AVX512CD_VL = 27, + AVX512DQ = 28, + AVX512DQ_VL = 29, + X86Base_X64 = 30, + SSE_X64 = 31, + SSE2_X64 = 32, + SSE3_X64 = 33, + SSSE3_X64 = 34, + SSE41_X64 = 35, + SSE42_X64 = 36, + AVX_X64 = 37, + AVX2_X64 = 38, + AES_X64 = 39, + BMI1_X64 = 40, + BMI2_X64 = 41, + FMA_X64 = 42, + LZCNT_X64 = 43, + PCLMULQDQ_X64 = 44, + POPCNT_X64 = 45, + AVXVNNI_X64 = 46, + MOVBE_X64 = 47, + X86Serialize_X64 = 48, + AVX512F_X64 = 49, + AVX512F_VL_X64 = 50, + AVX512BW_X64 = 51, + AVX512BW_VL_X64 = 52, + AVX512CD_X64 = 53, + AVX512CD_VL_X64 = 54, + AVX512DQ_X64 = 55, + AVX512DQ_VL_X64 = 56, } public unsafe struct InstructionSetFlags : IEnumerable @@ -520,6 +584,38 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -560,6 +656,22 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); break; case TargetArchitecture.X86: @@ -603,6 +715,22 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); break; } } while (!oldflags.Equals(resultflags)); @@ -701,6 +829,22 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -741,6 +885,22 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); break; case TargetArchitecture.X86: @@ -784,6 +944,22 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); break; } } while (!oldflags.Equals(resultflags)); @@ -801,6 +977,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("x86-x64-v3", TargetArchitecture.X86), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("skylake", TargetArchitecture.X64), "x86-x64-v3" }, { ("skylake", TargetArchitecture.X86), "x86-x64-v3" }, + { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, + { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, { ("armv8-a", TargetArchitecture.ARM64), "neon" }, { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, @@ -876,6 +1054,14 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); + yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512F, true); + yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512F_VL, true); + yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512BW, true); + yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X64_AVX512BW_VL, true); + yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X64_AVX512CD, true); + yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X64_AVX512CD_VL, true); + yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X64_AVX512DQ, true); + yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true); break; case TargetArchitecture.X86: @@ -900,6 +1086,14 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); + yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512F, true); + yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512F_VL, true); + yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512BW, true); + yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X86_AVX512BW_VL, true); + yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X86_AVX512CD, true); + yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X86_AVX512CD_VL, true); + yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X86_AVX512DQ, true); + yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true); break; } } @@ -967,6 +1161,22 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_MOVBE_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512F)) + AddInstructionSet(InstructionSet.X64_AVX512F_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512F_VL)) + AddInstructionSet(InstructionSet.X64_AVX512F_VL_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512BW)) + AddInstructionSet(InstructionSet.X64_AVX512BW_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) + AddInstructionSet(InstructionSet.X64_AVX512BW_VL_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512CD)) + AddInstructionSet(InstructionSet.X64_AVX512CD_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) + AddInstructionSet(InstructionSet.X64_AVX512CD_VL_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512DQ)) + AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) + AddInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64); break; case TargetArchitecture.X86: @@ -1010,6 +1220,14 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); AddInstructionSet(InstructionSet.X64_MOVBE_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); + AddInstructionSet(InstructionSet.X64_AVX512F_X64); + AddInstructionSet(InstructionSet.X64_AVX512F_VL_X64); + AddInstructionSet(InstructionSet.X64_AVX512BW_X64); + AddInstructionSet(InstructionSet.X64_AVX512BW_VL_X64); + AddInstructionSet(InstructionSet.X64_AVX512CD_X64); + AddInstructionSet(InstructionSet.X64_AVX512CD_VL_X64); + AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); + AddInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64); break; case TargetArchitecture.X86: @@ -1032,6 +1250,14 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); AddInstructionSet(InstructionSet.X86_MOVBE_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); + AddInstructionSet(InstructionSet.X86_AVX512F_X64); + AddInstructionSet(InstructionSet.X86_AVX512F_VL_X64); + AddInstructionSet(InstructionSet.X86_AVX512BW_X64); + AddInstructionSet(InstructionSet.X86_AVX512BW_VL_X64); + AddInstructionSet(InstructionSet.X86_AVX512CD_X64); + AddInstructionSet(InstructionSet.X86_AVX512CD_VL_X64); + AddInstructionSet(InstructionSet.X86_AVX512DQ_X64); + AddInstructionSet(InstructionSet.X86_AVX512DQ_VL_X64); break; } } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 59321364c42bd..adb3c17f2bb47 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -46,6 +46,14 @@ instructionset ,X86 , , , ,Vector256, instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize +instructionset ,X86 ,Avx512F , ,29 ,AVX512F ,avx512f +instructionset ,X86 ,Avx512F_VL, ,30 ,AVX512F_VL ,avx512f_vl +instructionset ,X86 ,Avx512BW , ,31 ,AVX512BW ,avx512bw +instructionset ,X86 ,Avx512BW_VL, ,32 ,AVX512BW_VL ,avx512bw_vl +instructionset ,X86 ,Avx512CD , ,33 ,AVX512CD ,avx512cd +instructionset ,X86 ,Avx512CD_VL, ,34 ,AVX512CD_VL ,avx512cd_vl +instructionset ,X86 ,Avx512DQ , ,35 ,AVX512DQ ,avx512dq +instructionset ,X86 ,Avx512DQ_VL, ,36 ,AVX512DQ_VL ,avx512dq_vl instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -66,6 +74,14 @@ instructionset64bit,X86 ,POPCNT instructionset64bit,X86 ,AVXVNNI instructionset64bit,X86 ,MOVBE instructionset64bit,X86 ,X86Serialize +instructionset64bit,X86 ,AVX512F +instructionset64bit,X86 ,AVX512F_VL +instructionset64bit,X86 ,AVX512BW +instructionset64bit,X86 ,AVX512BW_VL +instructionset64bit,X86 ,AVX512CD +instructionset64bit,X86 ,AVX512CD_VL +instructionset64bit,X86 ,AVX512DQ +instructionset64bit,X86 ,AVX512DQ_VL vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 @@ -91,6 +107,15 @@ implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,MOVBE ,SSE42 implication ,X86 ,X86Serialize, X86Base +implication ,X86 ,AVX512F, AVX2 +implication ,X86 ,AVX512F_VL,AVX512F +implication ,X86 ,AVX512CD, AVX512F +implication ,X86 ,AVX512CD_VL, AVX512F_VL +implication ,X86 ,AVX512BW, AVX512F +implication ,X86 ,AVX512BW_VL, AVX512F_VL +implication ,X86 ,AVX512DQ, AVX512F +implication ,X86 ,AVX512DQ_VL, AVX512F_VL + ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64 @@ -142,6 +167,7 @@ instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 instructionsetgroup ,x86-x64-v2 ,X64 X86 ,sse4.2 popcnt instructionsetgroup ,x86-x64-v3 ,X64 X86 ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma instructionsetgroup ,skylake ,X64 X86 ,x86-x64-v3 +instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 7105c956162f5..0bfcca959c245 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -101,6 +101,14 @@ private static class XArchIntrinsicConstants public const int Lzcnt = 0x1000; public const int AvxVnni = 0x2000; public const int Movbe = 0x4000; + public const int Avx512f = 0x8000; + public const int Avx512f_vl = 0x10000; + public const int Avx512bw = 0x20000; + public const int Avx512bw_vl = 0x40000; + public const int Avx512cd = 0x80000; + public const int Avx512cd_vl = 0x100000; + public const int Avx512dq = 0x200000; + public const int Avx512dq_vl = 0x400000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -140,6 +148,22 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, InstructionSet.X64_MOVBE_X64 => Movbe, + InstructionSet.X64_AVX512F => Avx512f, + InstructionSet.X64_AVX512F_X64 => Avx512f, + InstructionSet.X64_AVX512F_VL => Avx512f_vl, + InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, + InstructionSet.X64_AVX512BW => Avx512bw, + InstructionSet.X64_AVX512BW_X64 => Avx512bw, + InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, + InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, + InstructionSet.X64_AVX512CD => Avx512cd, + InstructionSet.X64_AVX512CD_X64 => Avx512cd, + InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, + InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, + InstructionSet.X64_AVX512DQ => Avx512dq, + InstructionSet.X64_AVX512DQ_X64 => Avx512dq, + InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, + InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, // SSE and SSE2 are baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index c62adef078ea2..c4501546d836e 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -649,6 +649,22 @@ LEAF_ENTRY xmmYmmStateSupport, _TEXT ret LEAF_END xmmYmmStateSupport, _TEXT +;; extern "C" DWORD __stdcall avx512StateSupport(); +LEAF_ENTRY avx512StateSupport, _TEXT + mov ecx, 0 ; Specify xcr0 + xgetbv ; result in EDX:EAX + and eax, 0E6H + cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support + jne not_supported + mov eax, 1 + jmp done + not_supported: + mov eax, 0 + done: + ret +LEAF_END avx512StateSupport, _TEXT + + ; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target); ; diff --git a/src/coreclr/vm/amd64/unixstubs.cpp b/src/coreclr/vm/amd64/unixstubs.cpp index 09d2568a9273b..8fdcfd15a9b3e 100644 --- a/src/coreclr/vm/amd64/unixstubs.cpp +++ b/src/coreclr/vm/amd64/unixstubs.cpp @@ -48,6 +48,18 @@ extern "C" return ((eax & 0x06) == 0x06) ? 1 : 0; } + DWORD avx512StateSupport() + { + DWORD eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled XMM, YMM and ZMM state support + return ((eax & 0x0E6) == 0x0E6) ? 1 : 0; + } + void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) { } diff --git a/src/coreclr/vm/cgensys.h b/src/coreclr/vm/cgensys.h index 868e9cf26bf3f..5e378c485332d 100644 --- a/src/coreclr/vm/cgensys.h +++ b/src/coreclr/vm/cgensys.h @@ -101,6 +101,7 @@ extern "C" void __stdcall __cpuid(int cpuInfo[4], int function_id); extern "C" void __stdcall __cpuidex(int cpuInfo[4], int function_id, int subFunction_id); #endif // TARGET_UNIX extern "C" DWORD __stdcall xmmYmmStateSupport(); +extern "C" DWORD __stdcall avx512StateSupport(); #endif const int CPUID_EAX = 0; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 6c2e15fe7a82f..ddf189c013f6a 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1289,6 +1289,36 @@ bool DoesOSSupportAVX() return TRUE; } +bool DoesOSSupportAVX512() +{ + LIMITED_METHOD_CONTRACT; + +#ifndef TARGET_UNIX + // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX512 is supported + typedef DWORD64 (WINAPI *PGETENABLEDXSTATEFEATURES)(); + PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + + HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if(hMod == NULL) + return FALSE; + + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); + + if (pfnGetEnabledXStateFeatures == NULL) + { + return FALSE; + } + + DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); + if ((FeatureMask & XSTATE_MASK_AVX512) == 0) + { + return FALSE; + } +#endif // !TARGET_UNIX + + return TRUE; +} + #endif // defined(TARGET_X86) || defined(TARGET_AMD64) #ifdef TARGET_ARM64 @@ -1377,7 +1407,31 @@ void EEJitManager::SetCpuInfo() // CORJIT_FLAG_USE_AVXVNNI if the following feature bit is set (input EAX of 0x07 and input ECX of 1): // CORJIT_FLAG_USE_AVX2 // AVXVNNI - EAX bit 4 - // CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without + // CORJIT_FLAG_USE_AVX_512F if the following feature bit is set (input EAX of 0x07 and input ECX of 0), and avx512StateSupport returns 1: + // CORJIT_FLAG_USE_AVX2 + // AVX512F - EBX bit 16 + // XGETBV - XRC0[7:5] 111b + // CORJIT_FLAG_USE_AVX_512F_VL if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F + // AVX512VL - EBX bit 31 + // CORJIT_FLAG_USE_AVX_512BW if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F + // AVX512BW - EBX bit 30 + // CORJIT_FLAG_USE_AVX_512BW_VL if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F_VL + // CORJIT_FLAG_USE_AVX_512BW + // CORJIT_FLAG_USE_AVX_512CD if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F + // AVX512CD - EBX bit 28 + // CORJIT_FLAG_USE_AVX_512CD_VL if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F_VL + // CORJIT_FLAG_USE_AVX_512CD + // CORJIT_FLAG_USE_AVX_512DQ if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F + // AVX512DQ - EBX bit 7 + // CORJIT_FLAG_USE_AVX_512DQ_VL if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX512F_VL + // CORJIT_FLAG_USE_AVX_512DQ // CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // BMI1 - EBX bit 3 // CORJIT_FLAG_USE_BMI2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): @@ -1460,6 +1514,48 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_AVX2); + if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + { + if ((cpuidInfo[EBX] & (1 << 16)) != 0) // AVX512F + { + CPUCompileFlags.Set(InstructionSet_AVX512F); + + bool isAVX512_VLSupported = false; + if ((cpuidInfo[EBX] & (1 << 31)) != 0) // AVX512VL + { + CPUCompileFlags.Set(InstructionSet_AVX512F_VL); + isAVX512_VLSupported = true; + } + + if ((cpuidInfo[EBX] & (1 << 30)) != 0) // AVX512BW + { + CPUCompileFlags.Set(InstructionSet_AVX512BW); + if (isAVX512_VLSupported) // AVX512BW_VL + { + CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); + } + } + + if ((cpuidInfo[EBX] & (1 << 28)) != 0) // AVX512CD + { + CPUCompileFlags.Set(InstructionSet_AVX512CD); + if (isAVX512_VLSupported) // AVX512CD_VL + { + CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); + } + } + + if ((cpuidInfo[EBX] & (1 << 17)) != 0) // AVX512DQ + { + CPUCompileFlags.Set(InstructionSet_AVX512DQ); + if (isAVX512_VLSupported) // AVX512DQ_VL + { + CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); + } + } + } + } + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); if ((cpuidInfo[EAX] & (1 << 4)) != 0) // AVX-VNNI { @@ -1615,6 +1711,46 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Clear(InstructionSet_AVX2); } + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512F); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512F_VL); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512BW); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512BW_VL); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512CD); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512CD_VL); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512DQ); + } + + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + { + CPUCompileFlags.Clear(InstructionSet_AVX512DQ_VL); + } + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) { CPUCompileFlags.Clear(InstructionSet_AVXVNNI); @@ -1691,6 +1827,7 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Clear(InstructionSet_X86Serialize); } + #elif defined(TARGET_ARM64) if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { diff --git a/src/coreclr/vm/i386/cgenx86.cpp b/src/coreclr/vm/i386/cgenx86.cpp index 356dab119f78b..811c84a140d32 100644 --- a/src/coreclr/vm/i386/cgenx86.cpp +++ b/src/coreclr/vm/i386/cgenx86.cpp @@ -1106,6 +1106,31 @@ extern "C" DWORD __stdcall xmmYmmStateSupport() } #pragma warning(pop) +#pragma warning(push) +#pragma warning(disable: 4035) +extern "C" DWORD __stdcall avx512StateSupport() +{ + // No CONTRACT + STATIC_CONTRACT_NOTHROW; + STATIC_CONTRACT_GC_NOTRIGGER; + + __asm + { + mov ecx, 0 ; Specify xcr0 + xgetbv ; result in EDX:EAX + and eax, 0E6H + cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support + jne not_supported + mov eax, 1 + jmp done + not_supported: + mov eax, 0 + done: + } +} +#pragma warning(pop) + + #else // !TARGET_UNIX void __cpuid(int cpuInfo[4], int function_id) @@ -1142,6 +1167,18 @@ extern "C" DWORD __stdcall xmmYmmStateSupport() return ((eax & 0x06) == 0x06) ? 1 : 0; } +extern "C" DWORD __stdcall avx512StateSupport() +{ + DWORD eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled XMM, YMM and ZMM state support + return ((eax & 0x0E6) == 0x0E6) ? 1 : 0; +} + #endif // !TARGET_UNIX void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam)