diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a2a70d3548..f1b20bedbd7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,10 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT APPLE) "Build with memory sanitizer (experimental; requires a memory-sanitized Python interpreter)" OFF) endif() +option( + ESPRESSO_ADD_OMPI_SINGLETON_WARNING + "Add a runtime warning in the pypresso script for NUMA architectures that aren't supported in singleton mode by Open MPI 4.x" + ON) option(WARNINGS_ARE_ERRORS "Treat warnings as errors during compilation" OFF) option(WITH_CCACHE "Use ccache compiler invocation." OFF) option(WITH_PROFILER "Enable profiler annotations." OFF) @@ -280,11 +284,38 @@ find_package(MPI 3.0 REQUIRED) find_package(MpiexecBackend) # OpenMPI checks the number of processes against the number of CPUs -if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI" AND "${MPIEXEC_BACKEND_VERSION}" - VERSION_GREATER_EQUAL 2.0.0) - set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe") -else() - set(MPIEXEC_OVERSUBSCRIBE "") +set(MPIEXEC_OVERSUBSCRIBE "") +# Open MPI 4.x has a bug on NUMA archs that prevents running in singleton mode +set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA OFF) +set(ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN "AMD (EPYC|Ryzen)") + +if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI") + if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 2.0.0) + set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe") + endif() + if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 4.0 + AND "${MPIEXEC_BACKEND_VERSION}" VERSION_LESS 5.0) + if(NOT DEFINED ESPRESSO_CPU_MODEL_NAME) + if(CMAKE_SYSTEM_NAME STREQUAL Linux) + if(EXISTS /proc/cpuinfo) + file(READ /proc/cpuinfo ESPRESSO_CPU_INFO) + string(REGEX + REPLACE ".*\n[Mm]odel name[ \t]*:[ \t]+([^\n]+).*" "\\1" + ESPRESSO_CPU_MODEL_NAME_STRING "${ESPRESSO_CPU_INFO}") + else() + set(ESPRESSO_CPU_MODEL_NAME_STRING "__unreadable") + endif() + else() + set(ESPRESSO_CPU_MODEL_NAME_STRING "__unaffected") + endif() + set(ESPRESSO_CPU_MODEL_NAME "${ESPRESSO_CPU_MODEL_NAME_STRING}" + CACHE INTERNAL "") + endif() + if(ESPRESSO_CPU_MODEL_NAME MATCHES + "^${ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN}") + set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA ON) + endif() + endif() endif() # OpenMPI cannot run two jobs in parallel in a Docker container, because the diff --git a/cmake/unit_test.cmake b/cmake/unit_test.cmake index 3e08b23e346..0a89c65fd14 100644 --- a/cmake/unit_test.cmake +++ b/cmake/unit_test.cmake @@ -30,12 +30,14 @@ function(UNIT_TEST) else() set(SANITIZERS_HALT_ON_ERROR "halt_on_error=0") endif() - set(UBSAN_OPTIONS "UBSAN_OPTIONS=suppressions=${CMAKE_SOURCE_DIR}/maintainer/CI/ubsan.supp:${SANITIZERS_HALT_ON_ERROR}:print_stacktrace=1") - set(ASAN_OPTIONS "ASAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}:detect_leaks=0:allocator_may_return_null=1") - set(MSAN_OPTIONS "MSAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}") + list(APPEND TEST_ENV_VARIABLES "UBSAN_OPTIONS=suppressions=${CMAKE_SOURCE_DIR}/maintainer/CI/ubsan.supp:${SANITIZERS_HALT_ON_ERROR}:print_stacktrace=1") + list(APPEND TEST_ENV_VARIABLES "ASAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}:detect_leaks=0:allocator_may_return_null=1") + list(APPEND TEST_ENV_VARIABLES "MSAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}") + if(NOT TEST_NUM_PROC AND ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA AND "${TEST_DEPENDS}" MATCHES "(^|;)([Bb]oost::mpi|MPI::MPI_CXX)($|;)") + list(APPEND TEST_ENV_VARIABLES "OMPI_MCA_hwloc_base_binding_policy=none") + endif() set_tests_properties( - ${TEST_NAME} PROPERTIES ENVIRONMENT - "${UBSAN_OPTIONS} ${ASAN_OPTIONS} ${MSAN_OPTIONS}") + ${TEST_NAME} PROPERTIES ENVIRONMENT "${TEST_ENV_VARIABLES}") add_dependencies(check_unit_tests ${TEST_NAME}) endfunction(UNIT_TEST) diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst index 18f3d82a796..8962db6d869 100644 --- a/doc/sphinx/installation.rst +++ b/doc/sphinx/installation.rst @@ -61,6 +61,15 @@ are required to be able to compile and use |es|: Other MPI implementations like Intel MPI should also work, although they are not actively tested in |es| continuous integration. + Open MPI version 4.x is known to not properly support the MCA binding + policy "numa" in singleton mode on a few NUMA architectures. + On affected systems, e.g. AMD Ryzen or AMD EPYC, Open MPI halts with + a fatal error when setting the processor affinity in ``MPI_Init``. + This issue can be resolved by setting the environment variable + ``OMPI_MCA_hwloc_base_binding_policy`` to a value other than "numa", + such as "l3cache" to bind to a NUMA shared memory block, or to + "none" to disable binding (can cause performance loss). + Python |es|'s main user interface relies on Python 3. @@ -720,6 +729,11 @@ The following options are available: * ``WITH_VALGRIND_INSTRUMENTATION``: Build with valgrind instrumentation markers +* ``ESPRESSO_ADD_OMPI_SINGLETON_WARNING``: Add a runtime warning in the + pypresso and ipypresso scripts that is triggered in singleton mode + with Open MPI version 4.x on unsupported NUMA environments + (see :term:`MPI installation requirements ` for details). + When the value in the :file:`CMakeLists.txt` file is set to ON, the corresponding option is created; if the value of the option is set to OFF, the corresponding option is not created. These options can also be modified diff --git a/doc/sphinx/running.rst b/doc/sphinx/running.rst index e377269030f..bc054e490f8 100644 --- a/doc/sphinx/running.rst +++ b/doc/sphinx/running.rst @@ -245,8 +245,7 @@ Parallel computing Many algorithms in |es| are designed to work with multiple MPI ranks. However, not all algorithms benefit from MPI parallelization equally. -Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`), while -a small subset simply don't support MPI (e.g. :ref:`Dipolar direct sum`). +Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`). |es| should work with most MPI implementations on the market; see the :term:`MPI installation requirements ` for details. diff --git a/src/python/pypresso.cmakein b/src/python/pypresso.cmakein index aab95c89e8f..e05ac268f9d 100755 --- a/src/python/pypresso.cmakein +++ b/src/python/pypresso.cmakein @@ -14,6 +14,15 @@ else fi export PYTHONPATH +# Open MPI 4.x cannot run in singleton mode on some NUMA systems +if [ "@ESPRESSO_ADD_OMPI_SINGLETON_WARNING@" = "ON" ] && [ "@ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA@" = "ON" ]; then + if [ -z "${OMPI_COMM_WORLD_SIZE}" ] && [ "${OMPI_MCA_hwloc_base_binding_policy}" = "numa" ]; then + if test -f /proc/cpuinfo && grep --quiet -P "^[Mm]odel name[ \t]*:[ \t]+@ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN@( |$)" /proc/cpuinfo; then + echo "warning: if Open MPI fails to set processor affinity, set environment variable OMPI_MCA_hwloc_base_binding_policy to \"none\" or \"l3cache\"" + fi + fi +fi + if [ "@CMAKE_CXX_COMPILER_ID@" != "GNU" ] && [ "@WITH_ASAN@" = "ON" ]; then asan_lib=$("@CMAKE_CXX_COMPILER@" /dev/null -### -o /dev/null -fsanitize=address 2>&1 | grep -o '[" ][^" ]*libclang_rt.asan[^" ]*[^s][" ]' | sed 's/[" ]//g' | sed 's/\.a$/.so/g') export DYLD_INSERT_LIBRARIES="$asan_lib" diff --git a/src/script_interface/tests/CMakeLists.txt b/src/script_interface/tests/CMakeLists.txt index dc9e3368a0f..c78015a7b43 100644 --- a/src/script_interface/tests/CMakeLists.txt +++ b/src/script_interface/tests/CMakeLists.txt @@ -42,9 +42,9 @@ unit_test(NAME ParallelExceptionHandler_test SRC unit_test(NAME packed_variant_test SRC packed_variant_test.cpp DEPENDS Espresso::script_interface) unit_test(NAME ObjectList_test SRC ObjectList_test.cpp DEPENDS - Espresso::script_interface Espresso::core) + Espresso::script_interface Espresso::core Boost::mpi) unit_test(NAME ObjectMap_test SRC ObjectMap_test.cpp DEPENDS - Espresso::script_interface Espresso::core) + Espresso::script_interface Espresso::core Boost::mpi) unit_test(NAME serialization_mpi_guard_test SRC serialization_mpi_guard_test.cpp DEPENDS Espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 2)