Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for --instruction-set:native #87865

Merged
merged 17 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/coreclr/nativeaot/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ set(COMMON_RUNTIME_SOURCES
${GC_DIR}/handletablescan.cpp
${GC_DIR}/objecthandle.cpp
${GC_DIR}/softwarewritewatch.cpp

${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
)

set(SERVER_GC_SOURCES
Expand Down Expand Up @@ -118,10 +120,6 @@ if (WIN32)
list(APPEND FULL_RUNTIME_SOURCES windows/CoffNativeCodeManager.cpp)

set(ASM_SUFFIX asm)

if (CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_AMD64)
set(RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/GC.${ASM_SUFFIX})
endif()
else()

include_directories(unix)
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "GCMemoryHelpers.inl"
#include "yieldprocessornormalized.h"
#include "RhConfig.h"
#include <minipal/cpufeatures.h>

COOP_PINVOKE_HELPER(void, RhDebugBreak, ())
{
Expand Down Expand Up @@ -411,6 +412,6 @@ COOP_PINVOKE_HELPER(uint32_t, RhGetKnobValues, (char *** pResultKeys, char *** p
#if defined(TARGET_X86) || defined(TARGET_AMD64)
EXTERN_C NATIVEAOT_API void __cdecl RhCpuIdEx(int* cpuInfo, int functionId, int subFunctionId)
{
__cpuidex(cpuInfo, functionId, subFunctionId);
minipal_cpuidex(cpuInfo, functionId, subFunctionId);
}
#endif
26 changes: 0 additions & 26 deletions src/coreclr/nativeaot/Runtime/PalRedhawk.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <sal.h>
#include <stdarg.h>
#include "gcenv.structs.h" // CRITICAL_SECTION
#include "IntrinsicConstants.h"
#include "PalRedhawkCommon.h"

#ifndef PAL_REDHAWK_INCLUDED
Expand Down Expand Up @@ -780,31 +779,6 @@ REDHAWK_PALIMPORT char* PalCopyTCharAsChar(const TCHAR* toCopy);
REDHAWK_PALIMPORT int32_t __cdecl _stricmp(const char *string1, const char *string2);
#endif // TARGET_UNIX

#if defined(HOST_X86) || defined(HOST_AMD64)

#ifdef TARGET_UNIX
// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
// We define matching signatures for use on Unix platforms.
//
// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid

REDHAWK_PALIMPORT void __cpuid(int cpuInfo[4], int function_id);
REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
#else
#include <intrin.h>
#endif

REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport();
REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled();

#endif // defined(HOST_X86) || defined(HOST_AMD64)

#if defined(HOST_ARM64)
REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags);
#endif //defined(HOST_ARM64)

#include "PalRedhawkInline.h"

#endif // !PAL_REDHAWK_INCLUDED
36 changes: 0 additions & 36 deletions src/coreclr/nativeaot/Runtime/amd64/GC.asm

This file was deleted.

1 change: 0 additions & 1 deletion src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@Ru
EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC

EXTERN g_fGcStressStarted : DWORD
EXTERN g_fHasFastFxsave : BYTE

;;
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
Expand Down
202 changes: 2 additions & 200 deletions src/coreclr/nativeaot/Runtime/startup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "stressLog.h"
#include "RestrictedCallouts.h"
#include "yieldprocessornormalized.h"
#include <minipal/cpufeatures.h>

#ifdef FEATURE_PERFTRACING
#include "EventPipeInterface.h"
Expand All @@ -48,9 +49,6 @@ static bool DetectCPUFeatures();

extern RhConfig * g_pRhConfig;

EXTERN_C bool g_fHasFastFxsave;
bool g_fHasFastFxsave = false;

CrstStatic g_ThunkPoolLock;

#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
Expand Down Expand Up @@ -180,203 +178,7 @@ static bool InitDLL(HANDLE hPalInstance)
bool DetectCPUFeatures()
{
#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)

#if defined(HOST_X86) || defined(HOST_AMD64)

int cpuidInfo[4];

const int CPUID_EAX = 0;
const int CPUID_EBX = 1;
const int CPUID_ECX = 2;
const int CPUID_EDX = 3;

__cpuid(cpuidInfo, 0x00000000);
uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);

if (maxCpuId >= 1)
{
__cpuid(cpuidInfo, 0x00000001);

const int requiredBaselineEdxFlags = (1 << 25) // SSE
| (1 << 26); // SSE2

if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags)
{
g_cpuFeatures |= XArchIntrinsicConstants_VectorT128;

if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI
{
g_cpuFeatures |= XArchIntrinsicConstants_Aes;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ
{
g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse3;

if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3
{
g_cpuFeatures |= XArchIntrinsicConstants_Ssse3;

if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse41;

if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse42;

if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE
{
g_cpuFeatures |= XArchIntrinsicConstants_Movbe;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT
{
g_cpuFeatures |= XArchIntrinsicConstants_Popcnt;
}

const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE
| (1 << 28); // AVX

if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags)
{
if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx;

if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA
{
g_cpuFeatures |= XArchIntrinsicConstants_Fma;
}

if (maxCpuId >= 0x07)
{
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
g_cpuFeatures |= XArchIntrinsicConstants_VectorT256;

if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
{
if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;
g_cpuFeatures |= XArchIntrinsicConstants_VectorT512;

bool isAVX512_VLSupported = false;
if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl;
isAVX512_VLSupported = true;
}

if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw;
if (isAVX512_VLSupported) // AVX512BW_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl;
}
}

if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd;
if (isAVX512_VLSupported) // AVX512CD_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl;
}
}

if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq;
if (isAVX512_VLSupported) // AVX512DQ_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl;
}
}

if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi;
if (isAVX512_VLSupported) // AVX512VBMI_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl;
}
}
}
}

__cpuidex(cpuidInfo, 0x00000007, 0x00000001);

if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI
{
g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni;
}
}
}
}
}
}
}
}
}
}

if (maxCpuId >= 0x07)
{
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1
{
g_cpuFeatures |= XArchIntrinsicConstants_Bmi1;
}

if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2
{
g_cpuFeatures |= XArchIntrinsicConstants_Bmi2;
}

if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0)
{
g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE
}
}
}

__cpuid(cpuidInfo, 0x80000000);
uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);

if (maxCpuIdEx >= 0x80000001)
{
__cpuid(cpuidInfo, 0x80000001);

if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT
{
g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt;
}

#ifdef HOST_AMD64
// AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers. The OS will enable this mode
// if it is supported. So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers.
// fxsr_opt is bit 25 of CPUID_EDX
if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0)
g_fHasFastFxsave = true;
#endif
}
#endif // HOST_X86 || HOST_AMD64

#if defined(HOST_ARM64)
PAL_GetCpuCapabilityFlags (&g_cpuFeatures);
#endif
g_cpuFeatures = minipal_getcpufeatures();

if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures)
{
Expand Down
Loading