From 56af7769ec208d7b7c7e4eb7e8295e5c14216f17 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Sep 2022 13:02:27 -0700 Subject: [PATCH] Changes for Cray & Clang * It seems that the new Cray compilers no longer define `_CRAYC`. However it does define `__cray__`. * For Clang based Cray compilers, use -O3 instead of -O2 for optimization. * Clang's vectorization pragma is very aggressive. For some codes, it makes ParallelFor with many if statements on CPU much slower than without vectorization. Unfortunately, it does not have an ivdep pragma. So we disable AMREX_PRAGMA for clang for safety. * No longer need to use -Wno-pass-failed for Clang based compilers. --- .github/workflows/hip.yml | 4 ++-- .github/workflows/macos.yml | 4 ++-- Src/Base/AMReX_Extension.H | 4 ++-- Tools/CMake/AMReXFlagsTargets.cmake | 18 +++++++++--------- Tools/CMake/AMReXParallelBackends.cmake | 2 +- Tools/CMake/AMReXSYCL.cmake | 2 +- Tools/GNUMake/comps/armclang.mak | 2 +- Tools/GNUMake/comps/cray.mak | 10 +++++----- Tools/GNUMake/comps/dpcpp.mak | 2 -- Tools/GNUMake/comps/hip.mak | 2 -- Tools/GNUMake/comps/llvm.mak | 2 +- 11 files changed, 24 insertions(+), 28 deletions(-) diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index a128eabf664..0672287437c 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -20,7 +20,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -66,7 +66,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e1446a038da..61eb9b9ccdb 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -14,7 +14,7 @@ jobs: env: # build universal binaries for M1 "Apple Silicon" and Intel CPUs CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 @@ -39,7 +39,7 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest env: - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 diff --git a/Src/Base/AMReX_Extension.H b/Src/Base/AMReX_Extension.H index a084777f1a0..753b43995f3 100644 --- a/Src/Base/AMReX_Extension.H +++ b/Src/Base/AMReX_Extension.H @@ -57,7 +57,7 @@ #elif defined(__INTEL_COMPILER) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") -#elif defined(_CRAYC) +#elif defined(_CRAYC) || defined(__cray__) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") #elif defined(__PGI) @@ -73,7 +73,7 @@ #define AMREX_PRAGMA_SIMD _Pragma("ibm independent_loop") #elif defined(__clang__) -#define AMREX_PRAGMA_SIMD _Pragma("clang loop vectorize(enable)") +#define AMREX_PRAGMA_SIMD #elif defined(__GNUC__) #define AMREX_PRAGMA_SIMD _Pragma("GCC ivdep") diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 64dcf3f3a5f..2e89c32fddc 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -82,15 +82,15 @@ target_compile_options( Flags_CXX $<${_cxx_cray_dbg}:-O0> $<${_cxx_cray_rwdbg}:> $<${_cxx_cray_rel}:> - $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_clang_rwdbg}:-Wno-pass-failed> - $<${_cxx_clang_rel}:-Wno-pass-failed> - $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_appleclang_rwdbg}:-Wno-pass-failed> - $<${_cxx_appleclang_rel}:-Wno-pass-failed> - $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_intelllvm_rwdbg}:-Wno-pass-failed> - $<${_cxx_intelllvm_rel}:-Wno-pass-failed> + $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_clang_rwdbg}:> + $<${_cxx_clang_rel}:> + $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_appleclang_rwdbg}:> + $<${_cxx_appleclang_rel}:> + $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_intelllvm_rwdbg}:> + $<${_cxx_intelllvm_rel}:> ) # diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index ebf397266f8..b249d28ef60 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -271,7 +271,7 @@ if (AMReX_HIP) # else there will be a runtime issue (cannot find # missing gpu devices) target_compile_options(amrex PUBLIC - $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC} -Wno-pass-failed>) + $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC}>) endif() target_compile_options(amrex PUBLIC $<$:-m64>) diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 8e6c7f2f4d5..007b5f321fe 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -45,7 +45,7 @@ target_compile_features(SYCL INTERFACE cxx_std_17) # target_compile_options( SYCL INTERFACE - $<${_cxx_dpcpp}:-Wno-error=sycl-strict -Wno-pass-failed -fsycl> + $<${_cxx_dpcpp}:-Wno-error=sycl-strict -fsycl> $<${_cxx_dpcpp}:$<$:-fsycl-device-code-split=per_kernel>>) # temporary work-around for DPC++ beta08 bug diff --git a/Tools/GNUMake/comps/armclang.mak b/Tools/GNUMake/comps/armclang.mak index efe4a718106..ccbfbeb77ed 100644 --- a/Tools/GNUMake/comps/armclang.mak +++ b/Tools/GNUMake/comps/armclang.mak @@ -57,7 +57,7 @@ ifeq ($(WARN_ERROR),TRUE) endif # disable some warnings -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions +CXXFLAGS += -Wno-c++17-extensions ######################################################################## diff --git a/Tools/GNUMake/comps/cray.mak b/Tools/GNUMake/comps/cray.mak index 85a1133e412..f75a56c5f75 100644 --- a/Tools/GNUMake/comps/cray.mak +++ b/Tools/GNUMake/comps/cray.mak @@ -53,10 +53,10 @@ else # CCE <= 8. So we adjust some flags to achieve similar optimization. See # this page: # http://pubs.cray.com/content/S-5212/9.0/cray-compiling-environment-cce-release-overview/cce-900-software-enhancements - CXXFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile - CFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile - FFLAGS += -O2 -h list=a - F90FLAGS += -O2 -h list=a + CXXFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile + CFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile + FFLAGS += -O3 -h list=a + F90FLAGS += -O3 -h list=a else GENERIC_COMP_FLAGS += -h list=a @@ -120,7 +120,7 @@ else endif ifeq ($(CRAY_IS_CLANG_BASED),TRUE) - CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions + CXXFLAGS += -Wno-c++17-extensions endif CXXFLAGS += $(GENERIC_COMP_FLAGS) diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index d2f7f72108e..b351f0ac731 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -36,8 +36,6 @@ else endif -CXXFLAGS += -Wno-pass-failed # disable this warning - ifeq ($(WARN_ALL),TRUE) warning_flags = -Wall -Wextra -Wno-sign-compare -Wunreachable-code -Wnull-dereference warning_flags += -Wfloat-conversion -Wextra-semi diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak index d94f8f3c66f..d86f887e087 100644 --- a/Tools/GNUMake/comps/hip.mak +++ b/Tools/GNUMake/comps/hip.mak @@ -86,8 +86,6 @@ ifeq ($(HIP_COMPILER),clang) endif - CXXFLAGS += -Wno-pass-failed # disable this warning - ifeq ($(WARN_ALL),TRUE) warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference warning_flags += -Wfloat-conversion -Wextra-semi diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak index 2bf710c0d94..ad516e0799d 100644 --- a/Tools/GNUMake/comps/llvm.mak +++ b/Tools/GNUMake/comps/llvm.mak @@ -60,7 +60,7 @@ ifeq ($(WARN_ERROR),TRUE) endif # disable some warnings -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions +CXXFLAGS += -Wno-c++17-extensions ########################################################################