From 79c0e9d56ded4cfe9c10b21a5ef0ebe874b070dd Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 11 Feb 2021 17:51:01 +0100 Subject: [PATCH 1/5] msvc doesn't seet __F16C__ --- faiss/impl/ScalarQuantizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index 6a6ca3b5a2..5af915b210 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -40,7 +40,7 @@ namespace faiss { ********************************************************************/ #ifdef __AVX2__ -#ifdef __F16C__ +#if defined(__F16C__) || defined(_MSC_VER) #define USE_F16C #else #warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well" From bb9064601e8e3abc37f6ece9c9f0580632af94ae Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 12 Feb 2021 01:19:15 +0100 Subject: [PATCH 2/5] add windows compat for __SSEx__ macros --- faiss/impl/ScalarQuantizer.cpp | 1 + faiss/impl/platform_macros.h | 10 ++++++++++ faiss/utils/distances_simd.cpp | 1 + 3 files changed, 12 insertions(+) diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index 5af915b210..32f22ae822 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -13,6 +13,7 @@ #include #include +#include #ifdef __SSE__ #include diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index e9910e6356..66391131bb 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -51,6 +51,16 @@ inline int __builtin_clzll(uint64_t x) { #define __builtin_popcountl __popcnt64 +// VS does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit processors +// cf. https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros +#ifdef __AVX__ +#define __SSE__ 1 +#define __SSE2__ 1 +#define __SSE3__ 1 +#define __SSE4_1__ 1 +#define __SSE4_2__ 1 +#endif + #else /******************************************************* * Linux and OSX diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp index 21bbb5c01d..e339c509f9 100644 --- a/faiss/utils/distances_simd.cpp +++ b/faiss/utils/distances_simd.cpp @@ -16,6 +16,7 @@ #include #include +#include #ifdef __SSE3__ #include From 5a676597c55f5ad027d08ee737d799714b2f1712 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 12 Feb 2021 10:27:41 +0100 Subject: [PATCH 3/5] add alias for int-version of __builtin_popcount Thankfully, __builtin_popcount / __builtin_popcountl are very clearly distinguishable visually. --- faiss/impl/platform_macros.h | 1 + 1 file changed, 1 insertion(+) diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index 66391131bb..b8ff03a952 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -49,6 +49,7 @@ inline int __builtin_clzll(uint64_t x) { return (int)__lzcnt64(x); } +#define __builtin_popcount __popcnt #define __builtin_popcountl __popcnt64 // VS does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit processors From 4123c02ccb71734149fd0b5949010f2bcbdb1852 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 12 Feb 2021 10:51:55 +0100 Subject: [PATCH 4/5] convert non-portable __aligned__ to existing compat-macro ALIGNED --- faiss/utils/distances_simd.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp index e339c509f9..06fc9203c7 100644 --- a/faiss/utils/distances_simd.cpp +++ b/faiss/utils/distances_simd.cpp @@ -166,7 +166,7 @@ void fvec_inner_products_ny_ref (float * ip, static inline __m128 masked_read (int d, const float *x) { assert (0 <= d && d < 4); - __attribute__((__aligned__(16))) float buf[4] = {0, 0, 0, 0}; + ALIGNED(16) float buf[4] = {0, 0, 0, 0}; switch (d) { case 3: buf[2] = x[2]; @@ -988,7 +988,7 @@ void compute_PQ_dis_tables_dsub2( simd8float32 centroids[8]; for (int k = 0; k < 8; k++) { - float centroid[8] __attribute__((aligned(32))); + ALIGNED(32) float centroid[8]; size_t wp = 0; size_t rp = (m0 * ksub + k + k0) * 2; for (int m = m0; m < m1; m++) { From 9678b195be64bf3eb3223c6f635c5e9e9c89084c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 12 Feb 2021 20:24:35 +0100 Subject: [PATCH 5/5] move F16C-vs-MSVC topic to platform_macros.h --- faiss/impl/ScalarQuantizer.cpp | 2 +- faiss/impl/platform_macros.h | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index 32f22ae822..f9e793c68e 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -41,7 +41,7 @@ namespace faiss { ********************************************************************/ #ifdef __AVX2__ -#if defined(__F16C__) || defined(_MSC_VER) +#ifdef __F16C__ #define USE_F16C #else #warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well" diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index b8ff03a952..1f4795c65c 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -52,7 +52,7 @@ inline int __builtin_clzll(uint64_t x) { #define __builtin_popcount __popcnt #define __builtin_popcountl __popcnt64 -// VS does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit processors +// MSVC does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit processors // cf. https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros #ifdef __AVX__ #define __SSE__ 1 @@ -62,6 +62,14 @@ inline int __builtin_clzll(uint64_t x) { #define __SSE4_2__ 1 #endif +// MSVC sets FMA and F16C automatically when using AVX2 +// Ref. FMA (under /arch:AVX2): https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64 +// Ref. F16C (2nd paragraph): https://walbourn.github.io/directxmath-avx2/ +#ifdef __AVX2__ +#define __FMA__ 1 +#define __F16C__ 1 +#endif + #else /******************************************************* * Linux and OSX