Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

windows clang ci #5469

Merged
merged 16 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions .github/workflows/windows-clang.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
name: windows-clang
on:
push:
branches: [master]
paths:
- '.github/workflows/windows-clang.yml'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'
- 'src/layer/arm/**'
- 'src/layer/x86/**'
- 'src/layer/vulkan/**'
- 'tests/**'
pull_request:
branches: [master]
paths:
- '.github/workflows/windows-clang.yml'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'
- 'src/layer/arm/**'
- 'src/layer/x86/**'
- 'src/layer/vulkan/**'
- 'tests/**'
concurrency:
group: windows-clang-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read

jobs:
windows:
name: ClangCL
runs-on: windows-2022

env:
UseMultiToolTask: true
NCNN_CMAKE_OPTIONS: -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF

steps:
- uses: actions/checkout@v4
with:
submodules: true

- name: arm64
run: |
mkdir build-arm64; cd build-arm64
cmake -T ClangCL -A arm64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=OFF ..
cmake --build . --config Release -j 4

- name: arm64-vulkan
run: |
mkdir build-arm64-vulkan; cd build-arm64-vulkan
cmake -T ClangCL -A arm64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
cmake --build . --config Release -j 4

- name: x86
run: |
mkdir build-x86; cd build-x86
cmake -T ClangCL -A Win32 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_BUILD_TESTS=ON -DNCNN_VULKAN=OFF ..
cmake --build . --config Release -j 4
- name: x86-test
run: cd build-x86; ctest -C Release --output-on-failure -j 4

- name: x86-vulkan
run: |
mkdir build-x86-vulkan; cd build-x86-vulkan
cmake -T ClangCL -A Win32 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
cmake --build . --config Release -j 4

- name: x64
run: |
mkdir build-x64; cd build-x64
cmake -T ClangCL -A x64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_BUILD_TESTS=ON -DNCNN_VULKAN=OFF ..
cmake --build . --config Release -j 4
- name: x64-test
run: cd build-x64; ctest -C Release --output-on-failure -j 4

- name: x64-vulkan
run: |
mkdir build-x64-vulkan; cd build-x64-vulkan
cmake -T ClangCL -A x64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
cmake --build . --config Release -j 4
79 changes: 72 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,15 @@ endif()
##############################################

include(CheckCXXCompilerFlag)
check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \"=r\"(a) : \"0\"(a) : \"memory\"); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
if(NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
option(NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON)
else()
message(WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF.")

# gnu inline assembly in clang msvc does not work actually
if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \"=r\"(a) : \"0\"(a) : \"memory\"); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
if(NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
option(NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON)
else()
message(WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF.")
endif()
endif()

if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
Expand Down Expand Up @@ -178,7 +182,7 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
endif()

if(CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)

Expand Down Expand Up @@ -212,6 +216,41 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)

unset(CMAKE_REQUIRED_FLAGS)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16x8_t _s, _a, _b; _s = vfmaq_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+dotprod")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vdotq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16fml")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; float16x8_t _a, _b; _s = vfmlalq_low_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+bf16")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; bfloat16x8_t _a, _b; _s = vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(_s, _a, _b))); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+i8mm")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vmmlaq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat16_t _s, _a, _b; svbool_t bp; _s = svmla_f16_z(bp, _s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve2")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint16_t _s; svint8_t _a, _b; _s = svmlslb_s16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+bf16")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s; svbfloat16_t _a, _b; _s = svbfmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+i8mm")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint32_t _s; svint8_t _a, _b; _s = svmmla_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)

set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+f32mm")
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)

unset(CMAKE_REQUIRED_FLAGS)
else()
set(CMAKE_REQUIRED_FLAGS "-march=armv8-a")
Expand Down Expand Up @@ -447,7 +486,7 @@ else()

option(NCNN_SSE2 "optimize x86 platform with sse2 extension" ON)

if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_FMA)
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP)
Expand All @@ -467,6 +506,32 @@ else()
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)

unset(CMAKE_REQUIRED_FLAGS)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mfma -mf16c")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256 _s, _a, _b; _s = _mm256_fmadd_ps(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_FMA)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mxop")
check_cxx_source_compiles("#include <x86intrin.h>\nint main() { __m128 _s, _a, _b; _s = _mm_maddd_epi16(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_XOP)

check_cxx_compiler_flag("/arch:AVX -mf16c" NCNN_COMPILER_SUPPORT_X86_F16C)
check_cxx_compiler_flag("/arch:AVX2 -mfma -mf16c" NCNN_COMPILER_SUPPORT_X86_AVX2)
check_cxx_compiler_flag("/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)

unset(CMAKE_REQUIRED_FLAGS)
else()
check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX)
Expand Down
65 changes: 63 additions & 2 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ macro(ncnn_add_layer class)
endif()

if(NCNN_TARGET_ARCH STREQUAL "x86")
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
Expand Down Expand Up @@ -165,6 +165,37 @@ macro(ncnn_add_layer class)
if(NCNN_F16C)
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_AVX)
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__")
endif()
if(NCNN_AVX512VNNI)
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
endif()
if(NCNN_AVX512BF16)
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
endif()
if(NCNN_AVX512FP16)
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
endif()
if(NCNN_AVXVNNI)
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mfma -mf16c -mavxvnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
endif()
if(NCNN_AVX2)
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_XOP)
ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop /D__SSSE3__ /D__SSE4_1__ /D__XOP__")
endif()
if(NCNN_F16C)
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
endif()
else()
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c")
Expand Down Expand Up @@ -216,7 +247,7 @@ macro(ncnn_add_layer class)
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
endif()
Expand Down Expand Up @@ -246,6 +277,36 @@ macro(ncnn_add_layer class)
endif()
if(NCNN_ARM86SVEF32MM)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
endif()
if(NCNN_ARM82)
ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 -march=armv8.2-a+fp16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM82DOT)
ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 -march=armv8.2-a+fp16+dotprod /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD")
endif()
if(NCNN_ARM82FP16FML)
ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 -march=armv8.2-a+fp16+fp16fml /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML")
endif()
if(NCNN_ARM84BF16)
ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+bf16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM84I8MM)
ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+i8mm /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8")
endif()
# TODO add support for sve family
if(NCNN_ARM86SVE)
endif()
if(NCNN_ARM86SVE2)
endif()
if(NCNN_ARM86SVEBF16)
endif()
if(NCNN_ARM86SVEI8MM)
endif()
if(NCNN_ARM86SVEF32MM)
endif()
else()
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
Expand Down
Loading
Loading