From a1f8ea1536b6ab7f802252ba7e2bc83b055afe46 Mon Sep 17 00:00:00 2001 From: Kim Walisch Date: Fri, 21 Jun 2024 18:45:08 +0200 Subject: [PATCH] Move x86 cpuid code from cpuid.hpp to src/arch/x86/cpuid.cpp --- CMakeLists.txt | 8 ++ ChangeLog | 3 +- cmake/multiarch_avx512_vpopcnt.cmake | 6 +- cmake/x86_cpuid.cmake | 29 +++++++ include/cpu_supports_avx512_bmi2.hpp | 72 +--------------- include/cpu_supports_popcnt.hpp | 26 +++--- include/cpuid.hpp | 55 ------------ src/arch/x86/cpuid.cpp | 124 +++++++++++++++++++++++++++ 8 files changed, 181 insertions(+), 142 deletions(-) create mode 100644 cmake/x86_cpuid.cmake delete mode 100644 include/cpuid.hpp create mode 100644 src/arch/x86/cpuid.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 26e6aa14..8abe3e68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,14 @@ else() set(LIB_SRC ${LIB_SRC} src/gourdon/AC.cpp) endif() +# On x86 CPUs compile cpuid.cpp ###################################### + +include("${PROJECT_SOURCE_DIR}/cmake/x86_cpuid.cmake") + +if(x86_cpuid) + set(LIB_SRC ${LIB_SRC} src/arch/x86/cpuid.cpp) +endif() + # Enable __float128 support (requires libquadmath) ################### if(WITH_FLOAT128) diff --git a/ChangeLog b/ChangeLog index 64969251..c6d73134 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ -Changes in primecount-7.14, 2024-06-20 +Changes in primecount-7.14, 2024-06-21 +* Move x86 cpuid code from cpuid.hpp to src/arch/x86/cpuid.cpp. * int128_t.hpp: Rename namespace port to pstd (portable std namespace). * popcnt.hpp: Improve GCC performance on x86 CPUs. * Sieve.hpp: Tune AVX512 code. diff --git a/cmake/multiarch_avx512_vpopcnt.cmake b/cmake/multiarch_avx512_vpopcnt.cmake index 92e000ed..691e9760 100644 --- a/cmake/multiarch_avx512_vpopcnt.cmake +++ b/cmake/multiarch_avx512_vpopcnt.cmake @@ -7,7 +7,7 @@ include(CheckCXXSourceCompiles) include(CMakePushCheckState) cmake_push_check_state() -set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}/include") +set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}") check_cxx_source_compiles(" // GCC/Clang function multiversioning for AVX512 is not needed if @@ -20,7 +20,7 @@ check_cxx_source_compiles(" Error: AVX512 BMI2 multiarch not needed! #endif - #include + #include #include #include @@ -65,7 +65,7 @@ check_cxx_source_compiles(" uint64_t cnt = 0; Sieve sieve; - if (cpu_supports_avx512_bmi2) + if (primecount::has_cpuid_avx512_bmi2()) cnt = sieve.count_avx512_bmi2(&array[0], 10); else cnt = sieve.count_default(&array[0], 10); diff --git a/cmake/x86_cpuid.cmake b/cmake/x86_cpuid.cmake new file mode 100644 index 00000000..fe99d065 --- /dev/null +++ b/cmake/x86_cpuid.cmake @@ -0,0 +1,29 @@ +# On x86 CPUs we need to enable the use of cpuid.cpp. +# If cpuid.cpp compiles we assume it is a x86 CPU. + +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) + +cmake_push_check_state() +set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}") + +check_cxx_source_compiles(" + #include + #include + + int main() + { + int abcd[4]; + primecount::run_cpuid(1, 0, abcd); + int ecx = abcd[2]; + + if (ecx & (1 << 23)) == (1 << 23)) + std::cout << \"CPU supports POPCNT!\" << std::endl; + else + std::cout << \"CPU does not support POPCNT!\" << std::endl; + + return 0; + } +" x86_cpuid) + +cmake_pop_check_state() diff --git a/include/cpu_supports_avx512_bmi2.hpp b/include/cpu_supports_avx512_bmi2.hpp index 3faea1be..c237842b 100644 --- a/include/cpu_supports_avx512_bmi2.hpp +++ b/include/cpu_supports_avx512_bmi2.hpp @@ -11,80 +11,16 @@ #ifndef CPU_SUPPORTS_AVX512_BMI2_HPP #define CPU_SUPPORTS_AVX512_BMI2_HPP -#include -#include +namespace primecount { -#if defined(_MSC_VER) - #include -#endif - -// CPUID bits documentation: -// https://en.wikipedia.org/wiki/CPUID - -// %ebx bit flags -#define bit_BMI2 (1 << 8) -#define bit_AVX512F (1 << 16) +bool has_cpuid_avx512_bmi2(); -// %ecx bit flags -#define bit_AVX512_VPOPCNTDQ (1 << 14) - -// xgetbv bit flags -#define XSTATE_SSE (1 << 1) -#define XSTATE_YMM (1 << 2) -#define XSTATE_ZMM (7 << 5) +} // namespace namespace { -// Get Value of Extended Control Register -inline uint64_t get_xcr0() -{ -#if defined(_MSC_VER) - return _xgetbv(0); -#else - uint32_t eax; - uint32_t edx; - - __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); - return eax | (uint64_t(edx) << 32); -#endif -} - -inline bool run_cpuid_avx512_bmi2() -{ - int abcd[4]; - - run_cpuid(1, 0, abcd); - - int osxsave_mask = (1 << 27); - - // Ensure OS supports extended processor state management - if ((abcd[2] & osxsave_mask) != osxsave_mask) - return false; - - uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; - uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; - uint64_t xcr0 = get_xcr0(); - - // Check AVX OS support - if ((xcr0 & ymm_mask) != ymm_mask) - return false; - - // Check AVX512 OS support - if ((xcr0 & zmm_mask) != zmm_mask) - return false; - - run_cpuid(7, 0, abcd); - - if ((abcd[1] & bit_BMI2) != bit_BMI2) - return false; - - // AVX512F, AVX512VPOPCNTDQ - return ((abcd[1] & bit_AVX512F) == bit_AVX512F && - (abcd[2] & bit_AVX512_VPOPCNTDQ) == bit_AVX512_VPOPCNTDQ); -} - /// Initialized at startup -bool cpu_supports_avx512_bmi2 = run_cpuid_avx512_bmi2(); +bool cpu_supports_avx512_bmi2 = primecount::has_cpuid_avx512_bmi2(); } // namespace diff --git a/include/cpu_supports_popcnt.hpp b/include/cpu_supports_popcnt.hpp index a504fd03..d4d27a05 100644 --- a/include/cpu_supports_popcnt.hpp +++ b/include/cpu_supports_popcnt.hpp @@ -11,7 +11,9 @@ #ifndef CPU_SUPPORTS_POPCNT_HPP #define CPU_SUPPORTS_POPCNT_HPP -// Enable CPUID on x86 and x86-64 CPUs +// Enable CPUID for POPCNT on x86 and x86-64 CPUs. +// This is required because not all x86 and x86-64 CPUs +// support the POPCNT instruction. #if defined(__x86_64__) || \ defined(__i386__) || \ defined(_M_X64) || \ @@ -23,6 +25,7 @@ // such as -mavx or -march=native. #if defined(__POPCNT__) #define HAS_POPCNT + // The MSVC compiler does not support a POPCNT macro, but if the user // compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines // the __AVX__ macro and POPCNT is also supported. @@ -31,29 +34,22 @@ #endif #if !defined(HAS_POPCNT) - -#include #define ENABLE_CPUID_POPCNT -namespace { +namespace primecount { -inline bool run_cpuid_supports_popcnt() -{ - int abcd[4]; - run_cpuid(1, 0, abcd); +bool has_cpuid_popcnt(); - // %ecx POPCNT bit flag - // https://en.wikipedia.org/wiki/CPUID - int bit_POPCNT = 1 << 23; - return (abcd[2] & bit_POPCNT) == bit_POPCNT; -} +} // namespace + +namespace { /// Initialized at startup -bool cpu_supports_popcnt = run_cpuid_supports_popcnt(); +bool cpu_supports_popcnt = primecount::has_cpuid_popcnt(); } // namespace #endif // !defined(HAS_POPCNT) -#endif // CPUID +#endif // x86 or x86-64 #endif diff --git a/include/cpuid.hpp b/include/cpuid.hpp deleted file mode 100644 index 1f45c587..00000000 --- a/include/cpuid.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/// -/// @file cpuid.hpp -/// @brief CPUID for x86 and x86-64 CPUs. -/// -/// Copyright (C) 2024 Kim Walisch, -/// -/// This file is distributed under the BSD License. See the COPYING -/// file in the top level directory. -/// - -#ifndef CPUID_HPP -#define CPUID_HPP - -#if defined(_MSC_VER) - #include -#endif - -namespace { - -inline void run_cpuid(int eax, int ecx, int* abcd) -{ -#if defined(_MSC_VER) - __cpuidex(abcd, eax, ecx); -#else - int ebx = 0; - int edx = 0; - - #if defined(__i386__) && \ - defined(__PIC__) - // In case of PIC under 32-bit EBX cannot be clobbered - __asm__ ("movl %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "+a" (eax), - "=D" (ebx), - "+c" (ecx), - "=d" (edx)); - #else - __asm__ ("cpuid" - : "+a" (eax), - "+b" (ebx), - "+c" (ecx), - "=d" (edx)); - #endif - - abcd[0] = eax; - abcd[1] = ebx; - abcd[2] = ecx; - abcd[3] = edx; -#endif -} - -} // namespace - -#endif diff --git a/src/arch/x86/cpuid.cpp b/src/arch/x86/cpuid.cpp new file mode 100644 index 00000000..cb99734a --- /dev/null +++ b/src/arch/x86/cpuid.cpp @@ -0,0 +1,124 @@ +/// +/// @file cpuid.cpp +/// @brief CPUID for x86 and x86-64 CPUs. +/// +/// Copyright (C) 2024 Kim Walisch, +/// +/// This file is distributed under the BSD License. See the COPYING +/// file in the top level directory. +/// + +#include + +#if defined(_MSC_VER) + #include + #include +#endif + +// CPUID bits documentation: +// https://en.wikipedia.org/wiki/CPUID + +// %ebx bit flags +#define bit_BMI2 (1 << 8) +#define bit_AVX512F (1 << 16) + +// %ecx bit flags +#define bit_AVX512_VPOPCNTDQ (1 << 14) +#define bit_POPCNT (1 << 23) + +// xgetbv bit flags +#define XSTATE_SSE (1 << 1) +#define XSTATE_YMM (1 << 2) +#define XSTATE_ZMM (7 << 5) + +namespace primecount { + +void run_cpuid(int eax, int ecx, int* abcd) +{ +#if defined(_MSC_VER) + __cpuidex(abcd, eax, ecx); +#else + int ebx = 0; + int edx = 0; + + #if defined(__i386__) && \ + defined(__PIC__) + // In case of PIC under 32-bit EBX cannot be clobbered + __asm__ ("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "+a" (eax), + "=D" (ebx), + "+c" (ecx), + "=d" (edx)); + #else + __asm__ ("cpuid" + : "+a" (eax), + "+b" (ebx), + "+c" (ecx), + "=d" (edx)); + #endif + + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +#endif +} + +bool has_cpuid_popcnt() +{ + int abcd[4]; + run_cpuid(1, 0, abcd); + return (abcd[2] & bit_POPCNT) == bit_POPCNT; +} + +// Get Value of Extended Control Register +uint64_t get_xcr0() +{ +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t eax; + uint32_t edx; + + __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + return eax | (uint64_t(edx) << 32); +#endif +} + +bool has_cpuid_avx512_bmi2() +{ + int abcd[4]; + + run_cpuid(1, 0, abcd); + + int osxsave_mask = (1 << 27); + + // Ensure OS supports extended processor state management + if ((abcd[2] & osxsave_mask) != osxsave_mask) + return false; + + uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM; + uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; + uint64_t xcr0 = get_xcr0(); + + // Check AVX OS support + if ((xcr0 & ymm_mask) != ymm_mask) + return false; + + // Check AVX512 OS support + if ((xcr0 & zmm_mask) != zmm_mask) + return false; + + run_cpuid(7, 0, abcd); + + if ((abcd[1] & bit_BMI2) != bit_BMI2) + return false; + + // AVX512F, AVX512VPOPCNTDQ + return ((abcd[1] & bit_AVX512F) == bit_AVX512F && + (abcd[2] & bit_AVX512_VPOPCNTDQ) == bit_AVX512_VPOPCNTDQ); +} + +} // namespace