From 853c483f762539adf87a81a6f14977aba3e48260 Mon Sep 17 00:00:00 2001 From: "Liu, Yucheng" Date: Fri, 20 Oct 2023 11:04:16 +0800 Subject: [PATCH 1/4] remove MSVC warning --- .../llm/runtime/graph/vectors/cpu/vec_load.hpp | 4 ++-- .../llm/runtime/graph/vectors/cpu/vec_store.hpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_load.hpp b/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_load.hpp index 435736eee87..3211ceac4e2 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_load.hpp +++ b/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_load.hpp @@ -26,7 +26,7 @@ inline fp32x16 load_fp32x16(void const* mem_addr) { #endif } template <> -fp32x16 load_kernel_t(const void* src) { +inline fp32x16 load_kernel_t(const void* src) { return load_fp32x16(src); } inline fp32x16 mask_load_fp32x16(fp32x16 src, int mask, void const* mem_addr) { @@ -43,7 +43,7 @@ inline bf16x16 load_bf16x16(void const* mem_addr) { return {_mm256_loadu_si256(mem_addr_bf16)}; } template <> -bf16x16 load_kernel_t(const void* src) { +inline bf16x16 load_kernel_t(const void* src) { return load_bf16x16(src); } diff --git a/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_store.hpp b/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_store.hpp index e096a0de96b..5522ebb2f81 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_store.hpp +++ b/intel_extension_for_transformers/llm/runtime/graph/vectors/cpu/vec_store.hpp @@ -20,7 +20,7 @@ inline void store_s8x16(void* mem_addr, s8x16 a) { _mm_storeu_si128(reinterpret_cast<__m128i*>(mem_addr), a.first); } inline void store_u8x16(void* mem_addr, u8x16 a) { _mm_storeu_si128(reinterpret_cast<__m128i*>(mem_addr), a.first); } template <> -void store_kernel_t(void* dst, s8x16 src) { +inline void store_kernel_t(void* dst, s8x16 src) { store_s8x16(dst, src); } @@ -57,7 +57,7 @@ inline void store_fp32x16(void* mem_addr, fp32x16 a) { } template <> -void store_kernel_t(void* dst, fp32x16 src) { +inline void store_kernel_t(void* dst, fp32x16 src) { store_fp32x16(dst, src); } @@ -66,7 +66,7 @@ inline void store_bf16x16(void* mem_addr, bf16x16 a) { } template <> -void store_kernel_t(void* dst, bf16x16 src) { +inline void store_kernel_t(void* dst, bf16x16 src) { store_bf16x16(dst, src); } From 7a4084d06a00e8e3a70bc29c00686598d1921141 Mon Sep 17 00:00:00 2001 From: "Liu, Yucheng" Date: Fri, 20 Oct 2023 11:40:20 +0800 Subject: [PATCH 2/4] auto detect AVX512 --- .../llm/runtime/graph/CMakeLists.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt index 666d9845abd..712aaa78ccf 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt @@ -54,9 +54,9 @@ option(NE_SANITIZE_UNDEFINED "neural_engine: enable undefined sanitizer" # instruction set specific option(NE_AVX "neural_engine: enable AVX" ON) option(NE_AVX2 "neural_engine: enable AVX2" ON) -option(NE_AVX512 "neural_engine: enable AVX512" ON) -option(NE_AVX512_VBMI "neural_engine: enable AVX512-VBMI" ON) -option(NE_AVX512_VNNI "neural_engine: enable AVX512-VNNI" ON) +option(NE_AVX512 "neural_engine: enable AVX512" OFF) +option(NE_AVX512_VBMI "neural_engine: enable AVX512-VBMI" OFF) +option(NE_AVX512_VNNI "neural_engine: enable AVX512-VNNI" OFF) option(NE_FMA "neural_engine: enable FMA" ON) option(NE_AMX "neural_engine: enable AMX" OFF) @@ -65,6 +65,13 @@ if (NOT MSVC) option(NE_F16C "neural_engine: enable F16C" ON) endif() +execute_process(COMMAND lscpu OUTPUT_VARIABLE CPUINFO) +if (CPUINFO MATCHES "avx512") + set(NE_AVX512 ON) + set(NE_AVX512_VBMI ON) + set(NE_AVX512_VNNI ON) +endif() + # 3rd party libs option(NE_ONEDNN "neural_engine: use oneDNN" ON) option(NE_LIBXSMM "neural_engine: use libxsmm" OFF) From 24933d3ee2925c84490ef8c8814ebc9a72c854e8 Mon Sep 17 00:00:00 2001 From: "Liu, Yucheng" Date: Fri, 20 Oct 2023 17:07:27 +0800 Subject: [PATCH 3/4] default to set AVX512 OFF --- .../llm/runtime/graph/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt index 712aaa78ccf..80d4a800273 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt @@ -65,13 +65,6 @@ if (NOT MSVC) option(NE_F16C "neural_engine: enable F16C" ON) endif() -execute_process(COMMAND lscpu OUTPUT_VARIABLE CPUINFO) -if (CPUINFO MATCHES "avx512") - set(NE_AVX512 ON) - set(NE_AVX512_VBMI ON) - set(NE_AVX512_VNNI ON) -endif() - # 3rd party libs option(NE_ONEDNN "neural_engine: use oneDNN" ON) option(NE_LIBXSMM "neural_engine: use libxsmm" OFF) From 4e10029959d271b62f24fd5ceb0ca975af24e3b6 Mon Sep 17 00:00:00 2001 From: "Liu, Yucheng" Date: Fri, 20 Oct 2023 17:08:38 +0800 Subject: [PATCH 4/4] update readme --- intel_extension_for_transformers/llm/runtime/graph/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/README.md b/intel_extension_for_transformers/llm/runtime/graph/README.md index 1f2c83f0224..4f5ca7839b5 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/README.md +++ b/intel_extension_for_transformers/llm/runtime/graph/README.md @@ -61,7 +61,7 @@ cd build cmake .. cmake --build . -j ``` -Note: add compile args ```-DNE_AVX512=OFF -DNE_AVX512_VBMI=OFF -DNE_AVX512_VNNI=OFF``` to ```cmake``` when compiling it on a CPU without AVX512 + ### 2. Run LLM with Python API You can use Python API to run Hugging Face model simply. Here is the sample code: