From 63985b96001b124ba495c2d58280164e397d0a73 Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 31 Jul 2023 15:21:29 +0800 Subject: [PATCH] detect msvc avx512bf16 avx512fp16 --- CMakeLists.txt | 43 ++++++++++++++++++++++++++------------ cmake/ncnn_add_layer.cmake | 6 ++++++ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7647a67b2c8..839518eb558 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -404,27 +404,44 @@ else() if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")) check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX) - check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_FMA) - check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP) - check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_F16C) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX") + check_cxx_source_compiles("#include \nint main() { __m256 _s, _a, _b; _s = _mm256_fmadd_ps(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_FMA) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX") + check_cxx_source_compiles("#ifdef _MSC_VER\n#include \n#else\n#include \n#endif\nint main() { __m128i _s, _a, _b; _s = _mm_maddd_epi16(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_XOP) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX") + check_cxx_source_compiles("#include \nint main() { __m256 _a; _a = _mm256_cvtph_ps(_mm256_cvtps_ph(_a, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC)); return 0; }" NCNN_COMPILER_SUPPORT_X86_F16C) + check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX2) - check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI) check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512) - check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.16) - # vs2017+ supports avx512 and vnni - set(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI OFF) - set(NCNN_COMPILER_SUPPORT_X86_AVX512 OFF) - set(NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI OFF) - endif() + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX2") + check_cxx_source_compiles("#include \nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512") + check_cxx_source_compiles("#include \nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512") + check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) + + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512") + check_cxx_source_compiles("#include \nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16) + + unset(CMAKE_REQUIRED_FLAGS) else() check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX) set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c") check_cxx_source_compiles("#include \nint main() { __m256 _s, _a, _b; _s = _mm256_fmadd_ps(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_FMA) - check_cxx_compiler_flag("-mxop" NCNN_COMPILER_SUPPORT_X86_XOP) - check_cxx_compiler_flag("-mf16c" NCNN_COMPILER_SUPPORT_X86_F16C) + set(CMAKE_REQUIRED_FLAGS "-mxop") + check_cxx_source_compiles("#ifdef _MSC_VER\n#include \n#else\n#include \n#endif\nint main() { __m128i _s, _a, _b; _s = _mm_maddd_epi16(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_XOP) + + set(CMAKE_REQUIRED_FLAGS "-mf16c") + check_cxx_source_compiles("#include \nint main() { __m256 _a; _a = _mm256_cvtph_ps(_mm256_cvtps_ph(_a, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC)); return 0; }" NCNN_COMPILER_SUPPORT_X86_F16C) + check_cxx_compiler_flag("-mfma -mf16c -mavx2" NCNN_COMPILER_SUPPORT_X86_AVX2) check_cxx_compiler_flag("-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512) diff --git a/cmake/ncnn_add_layer.cmake b/cmake/ncnn_add_layer.cmake index 89220de134e..c2a21729026 100644 --- a/cmake/ncnn_add_layer.cmake +++ b/cmake/ncnn_add_layer.cmake @@ -171,6 +171,12 @@ macro(ncnn_add_layer class) if(NCNN_AVX512VNNI) ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__") endif() + if(NCNN_AVX512BF16) + ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__") + endif() + if(NCNN_AVX512FP16) + ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__") + endif() if(NCNN_AVXVNNI) ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__") endif()