diff --git a/cmake/ncnn_add_layer.cmake b/cmake/ncnn_add_layer.cmake index 0b0fb323333..7f334fb0b68 100644 --- a/cmake/ncnn_add_layer.cmake +++ b/cmake/ncnn_add_layer.cmake @@ -133,104 +133,104 @@ macro(ncnn_add_layer class) set(layer_registry_vulkan "${layer_registry_vulkan}#if NCNN_STRING\n{\"${class}\", 0},\n#else\n{0},\n#endif\n") endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "x86") + if(NCNN_TARGET_ARCH STREQUAL "x86") if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - if(NCNN_AVX512) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512) ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_FMA) + if(NCNN_RUNTIME_CPU AND NCNN_FMA) ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_AVX) + if(NCNN_RUNTIME_CPU AND NCNN_AVX) ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__") endif() - if(NCNN_AVX512VNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512VNNI) ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__") endif() - if(NCNN_AVX512BF16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512BF16) ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__") endif() - if(NCNN_AVX512FP16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512FP16) ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__") endif() - if(NCNN_AVXVNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVXVNNI) ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__") endif() - if(NCNN_AVX2) + if(NCNN_RUNTIME_CPU AND NCNN_AVX2) ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_XOP) + if(NCNN_RUNTIME_CPU AND NCNN_XOP) ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__XOP__") endif() - if(NCNN_F16C) + if(NCNN_RUNTIME_CPU AND NCNN_F16C) ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__F16C__") endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") - if(NCNN_AVX512) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512) ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_FMA) + if(NCNN_RUNTIME_CPU AND NCNN_FMA) ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_AVX) + if(NCNN_RUNTIME_CPU AND NCNN_AVX) ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__") endif() - if(NCNN_AVX512VNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512VNNI) ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__") endif() - if(NCNN_AVX512BF16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512BF16) ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__") endif() - if(NCNN_AVX512FP16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512FP16) ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__") endif() - if(NCNN_AVXVNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVXVNNI) ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mfma -mf16c -mavxvnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__") endif() - if(NCNN_AVX2) + if(NCNN_RUNTIME_CPU AND NCNN_AVX2) ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() - if(NCNN_XOP) + if(NCNN_RUNTIME_CPU AND NCNN_XOP) ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop /D__SSSE3__ /D__SSE4_1__ /D__XOP__") endif() - if(NCNN_F16C) + if(NCNN_RUNTIME_CPU AND NCNN_F16C) ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c /D__SSSE3__ /D__SSE4_1__ /D__F16C__") endif() else() - if(NCNN_AVX512) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512) ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c") endif() - if(NCNN_FMA) + if(NCNN_RUNTIME_CPU AND NCNN_FMA) ncnn_add_arch_opt_layer(${class} fma "-mavx -mfma -mf16c") endif() - if(NCNN_AVX) + if(NCNN_RUNTIME_CPU AND NCNN_AVX) ncnn_add_arch_opt_layer(${class} avx "-mavx") endif() - if(NCNN_AVX512VNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512VNNI) ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni") endif() - if(NCNN_AVX512BF16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512BF16) ncnn_add_arch_opt_source(${class} avx512bf16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16") endif() - if(NCNN_AVX512FP16) + if(NCNN_RUNTIME_CPU AND NCNN_AVX512FP16) ncnn_add_arch_opt_source(${class} avx512fp16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16") endif() - if(NCNN_AVXVNNI) + if(NCNN_RUNTIME_CPU AND NCNN_AVXVNNI) ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mfma -mf16c -mavxvnni") endif() - if(NCNN_AVX2) + if(NCNN_RUNTIME_CPU AND NCNN_AVX2) ncnn_add_arch_opt_source(${class} avx2 "-mavx2 -mfma -mf16c") endif() - if(NCNN_XOP) + if(NCNN_RUNTIME_CPU AND NCNN_XOP) ncnn_add_arch_opt_source(${class} xop "-mavx -mxop") endif() - if(NCNN_F16C) + if(NCNN_RUNTIME_CPU AND NCNN_F16C) ncnn_add_arch_opt_source(${class} f16c "-mavx -mf16c") endif() endif() endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT NCNN_TARGET_ILP32)) + if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT NCNN_TARGET_ILP32)) if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")) if(NCNN_VFPV4) ncnn_add_arch_opt_source(${class} vfpv4 "/arch:VFPv4 /D__ARM_FP=0x0E") @@ -246,7 +246,7 @@ macro(ncnn_add_layer class) endif() endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)) + if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)) if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") if(NCNN_VFPV4) ncnn_add_arch_opt_source(${class} vfpv4 " ") @@ -254,28 +254,28 @@ macro(ncnn_add_layer class) if(NCNN_ARM82) ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC") endif() - if(NCNN_ARM82DOT) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82DOT) ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD") endif() - if(NCNN_ARM82FP16FML) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82FP16FML) ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML") endif() - if(NCNN_ARM84BF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84BF16) ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC") endif() - if(NCNN_ARM84I8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84I8MM) ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8") endif() # TODO add support for sve family - if(NCNN_ARM86SVE) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) endif() - if(NCNN_ARM86SVE2) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE2) endif() - if(NCNN_ARM86SVEBF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEBF16) endif() - if(NCNN_ARM86SVEI8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEI8MM) endif() - if(NCNN_ARM86SVEF32MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEF32MM) endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") if(NCNN_VFPV4) @@ -284,28 +284,28 @@ macro(ncnn_add_layer class) if(NCNN_ARM82) ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 -march=armv8.2-a+fp16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC") endif() - if(NCNN_ARM82DOT) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82DOT) ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 -march=armv8.2-a+fp16+dotprod /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD") endif() - if(NCNN_ARM82FP16FML) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82FP16FML) ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 -march=armv8.2-a+fp16+fp16fml /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML") endif() - if(NCNN_ARM84BF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84BF16) ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+bf16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC") endif() - if(NCNN_ARM84I8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84I8MM) ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+i8mm /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8") endif() # TODO add support for sve family - if(NCNN_ARM86SVE) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) endif() - if(NCNN_ARM86SVE2) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE2) endif() - if(NCNN_ARM86SVEBF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEBF16) endif() - if(NCNN_ARM86SVEI8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEI8MM) endif() - if(NCNN_ARM86SVEF32MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEF32MM) endif() else() if(NCNN_VFPV4) @@ -314,38 +314,38 @@ macro(ncnn_add_layer class) if(NCNN_ARM82) ncnn_add_arch_opt_source(${class} asimdhp "-march=armv8.2-a+fp16") endif() - if(NCNN_ARM82DOT) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82DOT) ncnn_add_arch_opt_source(${class} asimddp "-march=armv8.2-a+fp16+dotprod") endif() - if(NCNN_ARM82FP16FML) + if(NCNN_RUNTIME_CPU AND NCNN_ARM82FP16FML) ncnn_add_arch_opt_source(${class} asimdfhm "-march=armv8.2-a+fp16+fp16fml") endif() - if(NCNN_ARM84BF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84BF16) ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+fp16+dotprod+bf16") endif() - if(NCNN_ARM84I8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM84I8MM) ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+fp16+dotprod+i8mm") endif() - if(NCNN_ARM86SVE) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+fp16+dotprod+sve") endif() - if(NCNN_ARM86SVE2) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVE2) ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+fp16+dotprod+sve2") endif() - if(NCNN_ARM86SVEBF16) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEBF16) ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+fp16+dotprod+sve+bf16") endif() - if(NCNN_ARM86SVEI8MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEI8MM) ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+fp16+dotprod+sve+i8mm") endif() - if(NCNN_ARM86SVEF32MM) + if(NCNN_RUNTIME_CPU AND NCNN_ARM86SVEF32MM) ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+fp16+dotprod+sve+f32mm") endif() endif() endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "mips") - if(NCNN_MSA) + if(NCNN_TARGET_ARCH STREQUAL "mips") + if(NCNN_RUNTIME_CPU AND NCNN_MSA) ncnn_add_arch_opt_layer(${class} msa "-mmsa") endif() if(NCNN_MMI) @@ -353,17 +353,17 @@ macro(ncnn_add_layer class) endif() endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "loongarch") - if(NCNN_LASX) + if(NCNN_TARGET_ARCH STREQUAL "loongarch") + if(NCNN_RUNTIME_CPU AND NCNN_LASX) ncnn_add_arch_opt_layer(${class} lasx "-mlasx -mlsx") endif() - if(NCNN_LSX) + if(NCNN_RUNTIME_CPU AND NCNN_LSX) ncnn_add_arch_opt_layer(${class} lsx "-mlsx") endif() endif() - if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8) - if(NCNN_RVV) + if(NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8) + if(NCNN_RUNTIME_CPU AND NCNN_RVV) if(NCNN_COMPILER_SUPPORT_RVV_ZFH) ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh") elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)