diff --git a/cmake/VWFlags.cmake b/cmake/VWFlags.cmake index 8a46c09197a..7c157df7516 100644 --- a/cmake/VWFlags.cmake +++ b/cmake/VWFlags.cmake @@ -16,6 +16,9 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") if(NOT "arm64" STREQUAL "${CMAKE_OSX_ARCHITECTURES}") # Use sse2 by default. Change to latest simd extensions such as avx512 on supported architecture. set(LINUX_X86_64_OPT_FLAGS -msse2 -mfpmath=sse) + if(UNIX AND NOT APPLE) + set(LINUX_X86_64_OPT_FLAGS ${LINUX_X86_64_OPT_FLAGS} -mavx2 -mfma) + endif() endif() endif() diff --git a/vowpalwabbit/core/src/reductions/cb/details/large_action_space.h b/vowpalwabbit/core/src/reductions/cb/details/large_action_space.h index a5d82684317..f421724b185 100644 --- a/vowpalwabbit/core/src/reductions/cb/details/large_action_space.h +++ b/vowpalwabbit/core/src/reductions/cb/details/large_action_space.h @@ -12,6 +12,11 @@ #include "vw/core/v_array.h" #include "vw/core/vw_fwd.h" +// Eigen explicit vectorization may not work well with AVX2 when using smaller MAX_ALIGN_BYTES. +// For more info: +// https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html#TopicPreprocessorDirectivesPerformance +#define EIGEN_MAX_ALIGN_BYTES 32 + #include #include #include