diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index bdd629ce11f..afd37544c12 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -12,13 +12,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library_clang: - name: Clang@6.0 C++14 SP NOMPI Debug [lib] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions"} + name: Clang@7.0 C++17 SP NOMPI Debug [lib] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -34,7 +34,6 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DAMReX_PRECISION=SINGLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -48,14 +47,14 @@ jobs: ctest --output-on-failure tests_clang: - name: Clang@6.0 C++14 SP Particles DP Mesh Debug [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1"} + name: Clang@7.0 C++17 SP Particles DP Mesh Debug [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -O1 -Wnon-virtual-dtor"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -70,7 +69,6 @@ jobs: -DAMReX_PARTICLES=ON \ -DAMReX_PRECISION=DOUBLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -80,14 +78,14 @@ jobs: # Build 2D libamrex with configure configure-2d: - name: Clang@6.0 NOMPI Release [configure 2D] - runs-on: ubuntu-18.04 + name: Clang@7.0 NOMPI Release [configure 2D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | ./configure --dim 2 --with-fortran no --comp llvm --with-mpi no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-c++17-extensions" + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names" make install diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index c5fbceb5d7e..98a2b001760 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -7,44 +7,13 @@ concurrency: cancel-in-progress: true jobs: - # Build libamrex and all tests with CUDA 10.2 - tests-cuda10: - name: CUDA@10.2 GNU@6.5.0 C++14 Release [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} - steps: - - uses: actions/checkout@v2 - - name: Dependencies - run: .github/workflows/dependencies/dependencies_nvcc10.sh - - name: Build & Install - run: | - export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} - export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} - which nvcc || echo "nvcc not in PATH!" - mkdir build - cd build - cmake .. \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DAMReX_EB=OFF \ - -DAMReX_ENABLE_TESTS=ON \ - -DAMReX_FORTRAN=OFF \ - -DAMReX_PARTICLES=ON \ - -DAMReX_GPU_BACKEND=CUDA \ - -DCMAKE_C_COMPILER=$(which gcc-6) \ - -DCMAKE_CXX_COMPILER=$(which g++-6) \ - -DCMAKE_CUDA_HOST_COMPILER=$(which g++-6) \ - -DCMAKE_Fortran_COMPILER=$(which gfortran-6) \ - -DAMReX_CUDA_ARCH=7.0 \ - -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON - make -j 2 - # Build libamrex and all tests with CUDA 11.0.2 (recent supported) tests-cuda11: name: CUDA@11.2 GNU@9.3.0 C++17 Release [tests] runs-on: ubuntu-20.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install @@ -64,9 +33,7 @@ jobs: -DCMAKE_CXX_COMPILER=$(which g++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which g++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ - -DAMReX_CUDA_ARCH=8.0 \ + -DAMReX_CUDA_ARCH=7.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -78,7 +45,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wshadow"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvhpc21-11.sh - name: Build & Install @@ -106,8 +73,6 @@ jobs: -DCMAKE_CXX_COMPILER=$(which nvc++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which nvc++) \ -DCMAKE_Fortran_COMPILER=$(which nvfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ -DAMReX_CUDA_ARCH=8.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -119,12 +84,12 @@ jobs: name: CUDA@11.2 GNU@9.3.0 [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install run: | export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} ./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names CXXSTD=c++17 + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names make install diff --git a/.github/workflows/dependencies/dependencies.sh b/.github/workflows/dependencies/dependencies.sh index d0e86e99c0a..c9bb080831c 100755 --- a/.github/workflows/dependencies/dependencies.sh +++ b/.github/workflows/dependencies/dependencies.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl diff --git a/.github/workflows/dependencies/dependencies_clang6.sh b/.github/workflows/dependencies/dependencies_clang7.sh similarity index 73% rename from .github/workflows/dependencies/dependencies_clang6.sh rename to .github/workflows/dependencies/dependencies_clang7.sh index 19b348b920b..85396a2f73c 100755 --- a/.github/workflows/dependencies/dependencies_clang6.sh +++ b/.github/workflows/dependencies/dependencies_clang7.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl @@ -11,4 +11,4 @@ sudo apt-get update sudo apt-get install -y \ build-essential \ - clang gfortran + clang-7 gfortran diff --git a/.github/workflows/dependencies/dependencies_gcc8.sh b/.github/workflows/dependencies/dependencies_gcc8.sh new file mode 100755 index 00000000000..c216e6a8c51 --- /dev/null +++ b/.github/workflows/dependencies/dependencies_gcc8.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +# Copyright 2020-2022 The AMReX Community +# +# License: BSD-3-Clause-LBNL +# Authors: Axel Huebl + +set -eu -o pipefail + +sudo add-apt-repository ppa:ubuntu-toolchain-r/test +sudo apt-get update + +sudo apt-get install -y --no-install-recommends \ + build-essential \ + g++-8 gfortran-8 \ + libopenmpi-dev \ + openmpi-bin diff --git a/.github/workflows/dependencies/dependencies_nofortran.sh b/.github/workflows/dependencies/dependencies_nofortran.sh index 36d759f66fa..61089ad8bf7 100755 --- a/.github/workflows/dependencies/dependencies_nofortran.sh +++ b/.github/workflows/dependencies/dependencies_nofortran.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL +# Authors: Axel Huebl # search recursive inside a folder if a file contains tabs # diff --git a/.github/workflows/dependencies/dependencies_nvcc10.sh b/.github/workflows/dependencies/dependencies_nvcc10.sh deleted file mode 100755 index 591dd04d79b..00000000000 --- a/.github/workflows/dependencies/dependencies_nvcc10.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2020 Axel Huebl -# -# License: BSD-3-Clause-LBNL - -set -eu -o pipefail - -sudo apt-get update - -sudo apt-get install -y --no-install-recommends\ - build-essential \ - g++-6 \ - gfortran-6 \ - libopenmpi-dev \ - openmpi-bin - -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list -sudo apt-get update -sudo apt-get install -y \ - cuda-command-line-tools-10-2 \ - cuda-compiler-10-2 \ - cuda-cupti-dev-10-2 \ - cuda-minimal-build-10-2 \ - cuda-nvml-dev-10-2 \ - cuda-nvtx-10-2 \ - cuda-curand-dev-10-2 -sudo ln -s cuda-10.2 /usr/local/cuda diff --git a/.github/workflows/dependencies/dependencies_nvcc11.sh b/.github/workflows/dependencies/dependencies_nvcc11.sh index 79c8c6c31f6..a4b2f335a99 100755 --- a/.github/workflows/dependencies/dependencies_nvcc11.sh +++ b/.github/workflows/dependencies/dependencies_nvcc11.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 Axel Huebl # # License: BSD-3-Clause-LBNL @@ -19,9 +19,8 @@ sudo apt-get install -y \ pkg-config \ wget -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list +curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb +sudo dpkg -i cuda-keyring_1.0-1_all.deb sudo apt-get update sudo apt-get install -y \ cuda-command-line-tools-11-2 \ diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c0d50aa99e1..82e387cbff4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2.3.1 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly. + uses: actions/checkout@v3 with: persist-credentials: false diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 188d7d32f95..32726a4767a 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -13,13 +13,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library: - name: GNU@7.5 C++17 Release [lib] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + name: GNU@8.4 C++17 Release [lib] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -29,7 +29,9 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_INSTALL_PREFIX=/tmp/my-amrex \ - -DCMAKE_CXX_STANDARD=17 + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 make install make test_install @@ -41,12 +43,12 @@ jobs: # Build libamrex and all tests tests_build_3D: - name: GNU@7.5 C++14 3D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 3D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -64,12 +66,12 @@ jobs: ctest --test-dir build --output-on-failure tests_build_2D: - name: GNU@7.5 C++14 2D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 2D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -87,13 +89,13 @@ jobs: ctest --test-dir build --output-on-failure tests_build_1D: - name: GNU@7.5 C++14 1D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 1D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # -Werror temporarily skipped until we have functional testing established # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -113,10 +115,10 @@ jobs: # Build libamrex and all tests tests_cxx20: name: GNU@10.1 C++20 OMP [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi"} + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_gcc10.sh - name: Build & Install @@ -145,13 +147,13 @@ jobs: # Build libamrex and all tests w/o MPI tests-nonmpi: - name: GNU@7.5 C++14 NOMPI [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + name: GNU@8.4 C++17 NOMPI [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -167,18 +169,21 @@ jobs: -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=ON \ -DAMReX_MPI=OFF \ - -DAMReX_PARTICLES=ON + -DAMReX_PARTICLES=ON \ + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 ctest --output-on-failure # Build libamrex and all tests tests-nofortran: - name: GNU@7.5 C++14 w/o Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + name: GNU@9.3 C++17 w/o Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nofortran.sh - name: Build & Install @@ -203,10 +208,10 @@ jobs: # Build 1D libamrex with configure configure-1d: - name: GNU@7.5 Release [configure 1D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 1D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -217,10 +222,10 @@ jobs: # Build 3D libamrex with configure configure-3d: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@11.2 Release [configure 3D] + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -231,10 +236,10 @@ jobs: # Build 3D libamrex with single precision and tiny profiler configure-3d-single-tprof: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 3D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -245,10 +250,10 @@ jobs: # Build 3D libamrex debug omp build with configure configure-3d-omp-debug: - name: GNU@7.5 OMP Debug [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 OMP Debug [configure 3D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -260,9 +265,9 @@ jobs: # Build Tools/Plotfile plotfile-tools: name: GNU Plotfile Tools [tools] - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -272,11 +277,11 @@ jobs: # Build libamrex and run all tests tests_run: - name: GNU@7.5 C++14 [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + name: GNU@9.3 C++17 [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -295,13 +300,13 @@ jobs: ctest --output-on-failure -R test_hdf5: - name: GNU@7.5 HDF5 I/O Test [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 HDF5 I/O Test [tests] + runs-on: ubuntu-20.04 env: CXX: h5pcc CC: h5cc steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index a128eabf664..d542fb603a2 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -20,9 +20,9 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -47,6 +47,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which flang) \ @@ -66,9 +67,9 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -91,6 +92,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which hipcc) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ @@ -102,7 +104,7 @@ jobs: name: HIP EB [configure 2D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6fef4fc0459..6e7d87a299e 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -11,9 +11,9 @@ jobs: name: DPCPP GFortran@7.5 C++17 [tests] runs-on: ubuntu-20.04 # mkl/rng/device/detail/mrg32k3a_impl.hpp has a number of sign-compare error - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-sign-compare"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-sign-compare"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_dpcpp.sh - name: Build & Install @@ -41,7 +41,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: install dependencies run: | export DEBIAN_FRONTEND=noninteractive diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e1446a038da..be5a1e738ca 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -14,10 +14,10 @@ jobs: env: # build universal binaries for M1 "Apple Silicon" and Intel CPUs CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install @@ -39,10 +39,10 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest env: - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install diff --git a/.github/workflows/sensei.yml b/.github/workflows/sensei.yml index 19121889d6b..163456a924a 100644 --- a/.github/workflows/sensei.yml +++ b/.github/workflows/sensei.yml @@ -17,17 +17,17 @@ jobs: CC: clang CXXFLAGS: "-Werror -Wshadow -Woverloaded-virtual -Wunreachable-code -fno-operator-names" CMAKE_GENERATOR: Ninja - CMAKE_PREFIX_PATH: /root/install/sensei/develop/lib/cmake + CMAKE_PREFIX_PATH: /root/install/sensei/v4.0.0/lib64/cmake container: - image: ryankrattiger/sensei:fedora33-vtk-mpi-20210616 + image: senseiinsitu/ci:fedora35-amrex-20220613 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup run: mkdir build - name: Configure run: | cd build - cmake .. \ + cmake .. \ -DCMAKE_BUILD_TYPE=Debug \ -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=OFF \ diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index b459865f587..9c32554218d 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -10,13 +10,13 @@ jobs: tabs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Tabs run: .github/workflows/style/check_tabs.sh trailing_whitespaces: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Trailing Whitespaces run: .github/workflows/style/check_trailing_whitespaces.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b066ba6c98c..fba862d26dd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,7 +12,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -31,7 +31,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI static runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -49,7 +49,7 @@ jobs: name: Clang C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: seanmiddleditch/gha-setup-ninja@master - name: Build & Install shell: cmd diff --git a/CHANGES b/CHANGES index 8104566abe2..648db385c07 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,236 @@ +# 22.11 + + -- MPI Reduce for ValLocPair (#3003) + + -- `FabArray::isDefined` (#2997) + + -- Make The_Device_Arena non-managed (#2998) + + -- Add alias template Gpu::NonManagedDeviceVector (#2999) + + -- Pre- and Post-interpolation hook interface (#2991) + + -- Add user defined BC types (#2995) + + -- Add BCRec::set for convenience (#2993) + + -- ParallelFor with compile time optimization of kernels with run time parameters (#2954) + + -- 2D RZ solver for WarpX: Arbitrary coefficient (#2986) + + -- Runge-Kutta support for AMR (#2974) + + -- Fourth-order interpolation from fine to coarse level (#2987) + + -- Fix EB data inconsistency when fixing small cells and multiple cuts (#2943) + + -- MFIter::Finalize (#2983, #2985, #2988) + + -- Fix MLMG::getGradSolution & getFluxes for inhomogeneous Neumann and Robin BC (#2984) + + -- MLLinOp::postSolve (#2981) + + -- add templating for the cell bilinear interpolators (#2979) + + -- FillPatcher class (#2972) + + -- Remove sycl namespace alias (#2971) + + -- Fix Tensor Solver BC (#2930) + + -- Disable host device for macros for SYCL/DPC++ (#2969) + +# 22.10 + + -- Solve an issue with particles async IO when having runtime added variables (#2966) + + -- Fix int overflow in amrex::bisect (#2964) + + -- Fix MLEBNodeFDLaplacian bottom solver (#2963) + + -- make tagging routines EB_aware (#2962) + + -- Volume weighted sum (#2961) + + -- CellData: data in a single cell (#2959) + + -- Quartic interpolation for cell centered data (#2960) + + -- Add GPU-compatible upper bound and lower bound algorithms to AMReX_Algorithm (#2958) + + -- add option for makebuildsources to specify the style arguments for 'git describe'. (#2957) + + -- Add roundoff_lo corresponding to roundoff_hi for domains that don't start at 0 (#2950) + + -- Add template parameter to ParallelFor and launch specifying block size (#2947) + + -- Byte spread fixes (#2949) + + -- CMake: HIP_PATH from ROCM_PATH (#2948) + + -- Fix: Make Finalize->Initialize->F->I->... Work (#2944) + + -- Changes for Cray & Clang (#2941) + + -- Link to cublas when using CUDA and Hypre (#2933) + + -- HIP: use coarse grained host memory (#2932) + + -- EB checkpoint files (#2897) + + -- Fix: Loading Files Again (#2936) + + -- Check if boundary particles container has been created before clearance. (#2935) + + -- SYCL: Replace deprecated atomic types and operations (#2921) + +# 22.09 + + -- Preserve neighbor particles when sorting particles. (#2923) + + -- Scope of NonLocalBC::ParallelCopy (#2922) + + -- Open Boundary Poisson Solver (#2912) + Add hypre as an option for OpenBCSolver (#2931) + + -- Fix OOB access of ref ratio on HDF write header (#2919) + + -- Add Polaris to GNUMake (#2908) + + -- Export GpuDevice Globals (#2918) + + -- enable LinOp to use the right Factory (fixes moving geometry problem) (#2916) + + -- Use 1 atomic instead of two per item in DenseBins::build (#2911) + + -- [SYCL] Remove amrex::oneapi and update deprecated device descriptors (#2910) + + -- Add: `MultiFab::sum_unique` (#2909) + + -- In MLMG::mgFcycle, assert that for EB the linop is cell-centered. (#2905) + + -- EB: Add Fine Levels (#2881) + + -- Add rpath to lib64 for ZFP. (#2902) + + -- change data types from double to amrex::Real, and thus we can use single precision for the hypre IJ interface (#2896) + + -- MPMD Support (#2895) + + -- MLMG interface (#2858) + +# 22.08 + + -- Let `selectActualNeighbors` return right after starting if there are no + particles for communication. (#2886) + + -- Add Comm Sync to Redistribute (#2891) + + -- Multi-materials and derived variable output (#2888) + + -- Fix host / device sync bug in PODVector (#2890) + + -- MinLoc and MaxLoc Support (#2885) + + -- HIP: Remove the call to hipDeviceSetSharedMemConfig (#2884) + + -- Add Frontier to GNU Make (#2879) + + -- Add option to derefine to AMRErrorTag (#2875) + + -- Fix the segmentation fault in selecting actual neighbor particles. (#2877) + + -- Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874) + `TagBoxArray::collate`: Fujitsu Clang (#2889) + + -- Allow zero components MultiFab and BaseFab (#2873) + + -- New EB optimization parameter: eb2.num_coarsen_opt (#2872) + + -- SENSEI 4.0: Fix Build for Particles (#2869) + + -- Cache the neighbor comm tags for the CPU implementation of fillNeighbors. (#2862) + + -- Remove some hard checks in check_mvmc for 3D (#2864) + + -- Carry over fix for ngbxy.smallEnd typo (#2868) + +# 22.07 + + -- Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859) + + -- update the SENSEI in situ coupling for SENSEI v4.0.0 (#2785) + + -- Write runtime attribs to checkpoints on GPUs (#2856) + + -- Fix gnu make on Crusher for mpi_gtl_hsa (#2857) + + -- CMake: FindDependency CUDAToolkit (#2849) + + -- NERSC Programming Environment prototype (#2848) + + -- GNU Make: No need to query mpif90 if Fortran is not used. (#2852) + + -- Remove f90doc (#2851) + + -- Explicitly invoke python3 (#2850) + + -- Maintain the high end of the 'roundoff domain' in both float and double precision (#2839) + + -- add Ok to coordsys (#2844) + + -- ParamParse: Add Files at Runtime (#2842) + + -- Fix a pathological case for 2d EB (#2840) + + -- add fvolumesum to GNUmakefile (#2836) + + -- Clamp particles shifted from plo boundary against rhi, rather than back to plo (#2814) + + -- Fix: CMake NVTX not only Hypre (#2837) + + -- Update sensei CI container for sensei v4.0 integration (#2834) + + -- HIP Memory Advise : Set managed memory to coarse grain (#2835) + + -- CMake: Fix `export` with `AMReX_INSTALL=OFF` (#2838) + + -- make PODVector work with PolymorphicArenaAllocator (#2829) + + -- Re-implement FaceLinear::interp() for InterpFromCoarseLevel (#2831) + + -- Make regrid method of Amr class public (#2833) + + -- amrex::Any (#2827) + + -- Fix line integral computation (#2830) + + -- Fix a bug in multigrid grids (#2823) + + -- Add html, additional sections to README.md (#2775) + + -- Allow StateDataPhysBCFunct to operate on face-centered data (#2819) + + -- Fix Parser ODR (#2820) + + -- CMake: Cleanup old nvToolsExt (#2817) + + -- Handle the case where we don't have enough device memory for the snd_buffer (#2705) + + -- CMake: 3.17+ (#2813) + + -- Landon/fix bug ghost particles (#2812) + + -- Follow-on to 2809; update selectActualNeighbors as well. (#2810) + + -- Generalize the type of callables that can be passed into the neighbor list build function (#2809) + + -- Add AVX2 instructions flag. (#2803) + + -- Avoid M_PI because it's not in the C++ standard (#2807) + + -- In the array version of FillPatchTwoLevels, allow specifying an (#2800) + # 22.06 -- Fix solvability issue in the nodal solver RAP approach (#2783, #2801) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index dd4e53d455e..dc3022f7e12 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -2549,7 +2549,11 @@ The basic idea behind physical boundary conditions is as follows: Reflection from interior cells with sign changed, :math:`q(-i) = -q(i)`. -- For external Dirichlet boundaries, the user needs to provide a + user_1, user_2 and user_3 + "User". It is the user's responsibility to write a routine + to fill ghost cells (more details below). + +- For external Dirichlet and user boundaries, the user needs to provide a callable object like below. .. highlight:: c++ @@ -2564,7 +2568,7 @@ The basic idea behind physical boundary conditions is as follows: const BCRec* bcr, const int bcomp, const int orig_comp) const { - // external Dirichlet for cell iv + // external Dirichlet or user BC for cell iv } }; diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index 7b3273bf874..331f9b8c9f6 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -35,8 +35,8 @@ list of important variables. +-----------------+-------------------------------------+--------------------+ | COMP | gnu, cray, ibm, intel, llvm, or pgi | none | +-----------------+-------------------------------------+--------------------+ - | CXXSTD | C++ standard (``c++14``, ``c++17``, | compiler default, | - | | ``c++20``) | at least ``c++14`` | + | CXXSTD | C++ standard (``c++17``, ``c++20``) | compiler default, | + | | | at least ``c++17`` | +-----------------+-------------------------------------+--------------------+ | DEBUG | TRUE or FALSE | FALSE | +-----------------+-------------------------------------+--------------------+ @@ -584,7 +584,7 @@ the following line in the appropriate CMakeLists.txt file: :: - target_link_libraries( AMReX:: ) + target_link_libraries( PUBLIC AMReX:: ) In the above snippet, ```` is any of the targets listed in the table below. @@ -709,7 +709,7 @@ As an example, consider the following CMake code: :: find_package(AMReX REQUIRED 3D EB) - target_link_libraries( Foo AMReX::amrex AMReX::Flags_CXX ) + target_link_libraries( Foo PUBLIC AMReX::amrex ) The code in the snippet above checks whether an AMReX installation with 3D and Embedded Boundary support is available on the system. If so, AMReX is linked to target ``Foo`` and AMReX flags preset is used @@ -740,8 +740,8 @@ The AMReX team does development on Linux machines, from laptops to supercomputer We do not officially support AMReX on Windows, and many of us do not have access to any Windows machines. However, we believe there are no fundamental issues for it to work on Windows. -(1) AMReX mostly uses standard C++14, but for Windows C++17 is required. This is because we use - C++17 to support file system operations when POSIX I/O is not available. +(1) AMReX mostly uses standard C++17. +We run continous integration tests on Windows with MSVC and Clang compilers. (2) We use POSIX signal handling when floating point exceptions, segmentation faults, etc. happen. This capability is not supported on Windows. diff --git a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst index dd61bb254d3..3ecbc775c17 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst @@ -18,7 +18,7 @@ an application code then uses its own build system and links to AMReX as an exte Finally, AMReX can also be built with CMake, as detailed in the section on :ref:`sec:build:cmake`. -AMReX requires a C++ compiler that supports the C++14 standard, a +AMReX requires a C++ compiler that supports the C++17 standard, a Fortran compiler that supports the Fortran 2003 standard, and a C compiler that supports the C99 standard. Prerequisites for building with GNU Make include Python (>= 2.7, including 3) and standard tools diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 4101c806be2..4984b839132 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -315,7 +315,7 @@ we provide the helper function ``setup_target_for_cuda_compilation()``: setup_target_for_cuda_compilation(my_target) # Link against amrex - target_link_libraries(my_target AMReX::amrex) + target_link_libraries(my_target PUBLIC AMReX::amrex) @@ -1001,7 +1001,7 @@ launch function. ``amrex::ParallelFor()`` expands into different variations of a quadruply-nested :cpp:`for` loop depending dimensionality and whether it is being implemented on CPU or GPU. -The best way to understand this macro is to take a look at the 4D :cpp:`amrex::ParallelFor` +The best way to understand this function is to take a look at the 4D :cpp:`amrex::ParallelFor` that is implemented when ``USE_CUDA=FALSE``. A simplified version is reproduced here: .. highlight:: c++ @@ -1103,6 +1103,15 @@ bounds, a :cpp:`long` or :cpp:`int` number of elements is passed to bound the si passing the number of elements to work on and indexing the pointer to the starting element: :cpp:`p[idx + 15]`. +GPU block size +-------------- + +By default, :cpp:`ParallelFor` launches ``AMREX_GPU_MAX_THREADS`` threads +per GPU block, where ``AMREX_GPU_MAX_THREADS`` is a compile-time constant +with a default value of 256. The users can also explcitly specify the +number of threads per block by :cpp:`ParallelFor(...)`, where +``MY_BLOCK_SIZE`` is a multiple of the warp size (e.g., 128). This allows +the users to do performance tuning for individual kernels. Launching general kernels ------------------------- diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index c8743a3e8e2..d893859e7c2 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -209,8 +209,8 @@ function :: - void setDomainBC (const Array& lobc, // for lower ends - const Array& hibc); // for higher ends + void setDomainBC (const Array& lobc, // for lower ends + const Array& hibc); // for higher ends The supported BC types at the physical domain boundaries are @@ -222,6 +222,8 @@ The supported BC types at the physical domain boundaries are - :cpp:`LinOpBCType::inhomogNeumann` for inhomogeneous Neumann boundary condition. +- :cpp:`LinOpBCType::Robin` for Robin boundary conditions, :math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + - :cpp:`LinOpBCType::reflect_odd` for reflection with sign changed. 2) Cell-centered solvers only: @@ -255,12 +257,12 @@ before the solve one must always call the :cpp:`MLLinOp` member function :: virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a, - const MultiFab* robinbc_b, - const MultiFab* robinbc_f) = 0; + const MultiFab* robinbc_a = nullptr, + const MultiFab* robinbc_b = nullptr, + const MultiFab* robinbc_f = nullptr) = 0; -If we want to supply an inhomogeneous Dirichlet, inhomogeneous Neumann, or -Robin boundary conditions at the domain boundaries, we must supply those values +If we want to supply an inhomogeneous Dirichlet or inhomogeneous Neumann +boundary condition at the domain boundaries, we must supply those values in ``MultiFab* levelbcdata``, which must have at least one ghost cell. Note that the argument :cpp:`amrlev` is relative to the solve, not necessarily the full AMR hierarchy; amrlev = 0 refers to the coarsest @@ -286,6 +288,11 @@ Dirichlet or Neumann boundaries are assumed to be exactly on the face of the physical domain; storing these values in the ghost cell of a cell-centered array is a convenience of implementation. +For Robin boundary conditions, the ghost cells in +``MultiFab* robinbc_a``, ``MultiFab* robinbc_b``, and ``MultiFab* robinbc_f`` +store the numerical values in the condition, +:math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + .. _sec:linearsolver:pars: Parameters @@ -754,4 +761,3 @@ An example (implemented in the ``MultiComponent`` tutorial) might be: See ``amrex-tutorials/ExampleCodes/LinearSolvers/MultiComponent`` for a complete working example. .. solver reuse - diff --git a/Docs/sphinx_documentation/source/Post_Processing.rst b/Docs/sphinx_documentation/source/Post_Processing.rst index c2cce7fd7b2..fd707f221db 100644 --- a/Docs/sphinx_documentation/source/Post_Processing.rst +++ b/Docs/sphinx_documentation/source/Post_Processing.rst @@ -76,8 +76,8 @@ variable. **How to build and run** -In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fextract.gnu.ex`` to run. -Typing ``./fextract.gnu.ex`` without inputs will bring up usage and options. +In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fcompare.gnu.ex`` to run. +Typing ``./fcompare.gnu.ex`` without inputs will bring up usage and options. **Example** diff --git a/Docs/sphinx_documentation/source/SWFFT.rst b/Docs/sphinx_documentation/source/SWFFT.rst index 3e886dcc2a8..9e6192ff048 100644 --- a/Docs/sphinx_documentation/source/SWFFT.rst +++ b/Docs/sphinx_documentation/source/SWFFT.rst @@ -98,7 +98,7 @@ AMReX contains two SWFFT tutorials, `SWFFT Poisson`_ and `SWFFT Simple`_: .. _`SWFFT Simple`: https://amrex-codes.github.io/amrex/tutorials_html/SWFFT_Tutorial.html#swfft-simple .. [1] - https://xgitlab.cels.anl.gov/hacc/SWFFT + https://git.cels.anl.gov/hacc/SWFFT .. [2] SWFFT source code directory in AMReX: amrex/Src/Extern/SWFFT diff --git a/Docs/sphinx_documentation/source/Testing.rst b/Docs/sphinx_documentation/source/Testing.rst index b7e32c9477b..bbceae1d1ad 100644 --- a/Docs/sphinx_documentation/source/Testing.rst +++ b/Docs/sphinx_documentation/source/Testing.rst @@ -18,6 +18,7 @@ application codes that use it as a framework. We use an in-house test runner scr operation, originally developed by Michael Zingale for the Castro code, and later expanded to other application codes as well. The results for each night are collected and stored on a web page; see https://ccse.lbl.gov/pub/RegressionTesting/ for the latest set of results. +The runtime option ``amrex.abort_on_unused_inputs`` (``0`` or ``1``; default is ``0`` for false) is useful for making sure that tests always stay up to date with API changes as it will abort the application after the test run if any unused input parameters were detected. Running the test suite locally ============================== @@ -73,7 +74,7 @@ re-run the script without the :cpp:`--make_benchmarks` option: :: - python regtest.py --make_benchmarks 'generating initial benchmarks' AMReX-tests.ini + python regtest.py AMReX-tests.ini The script will generate a set of html pages in the directory specified in your :cpp:`AMReX-tests.ini` file that you can examine using the browser of your choice. diff --git a/Docs/sphinx_documentation/source/Visualization.rst b/Docs/sphinx_documentation/source/Visualization.rst index ea8b4ab8c0b..59f95f76090 100644 --- a/Docs/sphinx_documentation/source/Visualization.rst +++ b/Docs/sphinx_documentation/source/Visualization.rst @@ -873,9 +873,12 @@ and point to the CMake configuration installed with SENSEI. .. code-block:: bash - cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=/lib/cmake .. + cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake .. -When CMake generates the make files proceed as usual. +When CMake generates the make files proceed as usual. Note: may be +`lib` or `lib64` or something else depending on what CMake decided to use for +your particular OS. See the CMake GNUInstallDirs documentation for more +information. .. code-block:: bash @@ -952,8 +955,7 @@ dataset. Obtaining SENSEI ----------------- -SENSEI is hosted on Kitware's Gitlab site at https://gitlab.kitware.com/sensei/sensei -It's best to checkout the latest release rather than working on the master branch. +SENSEI is hosted on github at https://github.com/SENSEI-insitu/SENSEI.git To ease the burden of wrangling back end installs SENSEI provides two platforms with all dependencies pre-installed, a VirtualBox VM, and a NERSC Cori diff --git a/GNUmakefile.in b/GNUmakefile.in index 8a6ce69df09..ad6238543dc 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -19,6 +19,9 @@ ifeq ($(USE_FORTRAN_INTERFACE),TRUE) endif ifeq ($(USE_LINEAR_SOLVERS),TRUE) Pdirs += LinearSolvers/MLMG + ifeq ($(DIM),3) + Pdirs += LinearSolvers/OpenBC + endif ifeq ($(USE_FORTRAN_INTERFACE),TRUE) Pdirs += F_Interfaces/LinearSolvers endif diff --git a/INSTALL b/INSTALL index efb40fbdb2e..ed1e0dfb36e 100644 --- a/INSTALL +++ b/INSTALL @@ -10,7 +10,7 @@ There are three ways to use AMReX. Fortran modules via `./configure` followed by `make` and `make install`. Type `./configure -h` to show help message. An application code uses its build system to compile and link to the - AMReX library. Because AMReX uses C++14 and Fortran, the linker + AMReX library. Because AMReX uses C++17 and Fortran, the linker needs to link the libraries. See `Tutorials/Basic/Build_with_libamrex` for an example of this approach. Note that this approach relies the make system in diff --git a/README.md b/README.md index 72c182470e1..da3a1abcbd2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
-AMReX Logo +AMReX Logo

@@ -71,7 +71,7 @@ in a wide variety of other scientific simulations, some of which, can be seen in our application [gallery](https://amrex-codes.github.io/amrex/gallery.html).

## Get Help diff --git a/Src/Amr/AMReX_Amr.cpp b/Src/Amr/AMReX_Amr.cpp index 66ec4664c5a..02f0452eac9 100644 --- a/Src/Amr/AMReX_Amr.cpp +++ b/Src/Amr/AMReX_Amr.cpp @@ -910,7 +910,7 @@ Amr::writeSmallPlotFile () // Don't continue if we have no variables to plot. - if (stateSmallPlotVars().size() == 0) { + if (stateSmallPlotVars().size() == 0 && deriveSmallPlotVars().size() == 0) { return; } diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index 0aaf7fc2620..5034df1b5e5 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -15,6 +15,8 @@ #include #include #include +#include +#include #ifdef AMREX_USE_EB #include #endif @@ -152,11 +154,10 @@ public: int ncycle) = 0; /** - * \brief Contains operations to be done after a timestep. This is a - * pure virtual function and hence MUST be implemented by derived - * classes. + * \brief Contains operations to be done after a timestep. If this + * function is overridden, don't forget to reset FillPatcher. */ - virtual void post_timestep (int iteration) = 0; + virtual void post_timestep (int iteration); /** * \brief Contains operations to be done only after a full coarse * timestep. The default implementation does nothing. @@ -243,12 +244,14 @@ public: Long countCells () const noexcept; //! Get the area not to tag. - const BoxArray& getAreaNotToTag() noexcept; - const Box& getAreaToTag() noexcept; + const BoxArray& getAreaNotToTag () noexcept; + const Box& getAreaToTag () noexcept; //! Construct the area not to tag. - void constructAreaNotToTag(); + void constructAreaNotToTag (); //! Set the area not to tag. - void setAreaNotToTag(BoxArray& ba) noexcept; + void setAreaNotToTag (BoxArray& ba) noexcept; + + void resetFillPatcher (); /** * \brief Error estimation for regridding. This is a pure virtual @@ -365,6 +368,20 @@ public: virtual void particle_redistribute (int /*lbase*/ = 0, bool /*a_init*/ = false) {;} #endif + /** + * \brief Fill with FillPatcher on level > 0 and AmrLevel::FillPatch on level 0. + * + * \param mf destination MultiFab + * \param dcomp starting component for the destination + * \param ncomp number of component to fill + * \param nghost number of ghost cells to fill + * \param time time + * \param state_index StateData index + * \param scomp starting component in the StateData + */ + void FillPatcherFill (amrex::MultiFab& mf, int dcomp, int ncomp, int nghost, + amrex::Real time, int state_index, int scomp); + static void FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, int boxGrow, @@ -380,8 +397,33 @@ public: Real time, int index, int scomp, - int ncomp, - int dcomp=0); + int ncomp, + int dcomp=0); + + /** + * \brief Evolve one step with Runge-Kutta (2, 3, or 4) + * + * To use RK, the StateData must have all the ghost cells needed. See + * namespace RungeKutta for expected function signatures of the callable + * parameters. + * + * \param order order of RK + * \param state_type index of StateData + * \param time time at the beginning of the step. + * \param dt time step + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param f computing right-hand side for evolving the StateData. + * One can also register data for flux registers in this. + * \param p optionally post-processing RK stage results + */ + template + void RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p = RungeKutta::PostStageNoOp()); #ifdef AMREX_USE_EB static void SetEBMaxGrowCells (int nbasic, int nvolume, int nfull) noexcept { @@ -425,7 +467,7 @@ protected: IntVect fine_ratio; // Refinement ratio to finer level. static DeriveList derive_lst; // List of derived quantities. static DescriptorList desc_lst; // List of state variables. - Vector state; // Array of state data. + Vector state; // Array of state data. BoxArray m_AreaNotToTag; //Area which shouldn't be tagged on this level. Box m_AreaToTag; //Area which is allowed to be tagged on this level. @@ -436,8 +478,18 @@ protected: std::unique_ptr > m_factory; + Vector>> m_fillpatcher; + private: + template + void storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk); + + void FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle); + mutable BoxArray edge_grids[AMREX_SPACEDIM]; // face-centered grids mutable BoxArray nodal_grids; // all nodal grids }; @@ -558,6 +610,74 @@ private: std::map< int,Vector< Vector< Vector > > > m_fbid; // [grid][level][fillablesubbox][oldnew] }; +template +void AmrLevel::RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p) +{ + BL_PROFILE("AmrLevel::RK()"); + + AMREX_ASSERT(AmrLevel::desc_lst[state_type].nExtra() > 0); // Need ghost cells in StateData + + MultiFab& S_old = get_old_data(state_type); + MultiFab& S_new = get_new_data(state_type); + const Real t_old = state[state_type].prevTime(); + const Real t_new = state[state_type].curTime(); + AMREX_ALWAYS_ASSERT(amrex::almostEqual(time,t_old) && amrex::almostEqual(time+dt,t_new)); + + if (order == 2) { + RungeKutta::RK2(S_old, S_new, time, dt, std::forward(f), + [&] (int /*stage*/, MultiFab& mf, Real t) { + FillPatcherFill(mf, 0, mf.nComp(), mf.nGrow(), t, + state_type, 0); }, + std::forward

(p)); + } else if (order == 3) { + RungeKutta::RK3(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else if (order == 4) { + RungeKutta::RK4(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else { + amrex::Abort("AmrLevel::RK: order = "+std::to_string(order)+" is not supported"); + } +} + +template +void AmrLevel::storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk) +{ + if (level == parent->finestLevel()) { return; } + + const StateDescriptor& desc = AmrLevel::desc_lst[state_type]; + + auto& fillpatcher = parent->getLevel(level+1).m_fillpatcher[state_type]; + fillpatcher = std::make_unique> + (parent->boxArray(level+1), parent->DistributionMap(level+1), + parent->Geom(level+1), + parent->boxArray(level), parent->DistributionMap(level), + parent->Geom(level), + IntVect(desc.nExtra()), desc.nComp(), desc.interp(0)); + + fillpatcher->storeRKCoarseData(time, dt, S_old, rkk); +} + + } #endif /*_AmrLevel_H_*/ diff --git a/Src/Amr/AMReX_AmrLevel.cpp b/Src/Amr/AMReX_AmrLevel.cpp index a88489f9512..c10a1e6277b 100644 --- a/Src/Amr/AMReX_AmrLevel.cpp +++ b/Src/Amr/AMReX_AmrLevel.cpp @@ -31,6 +31,14 @@ EBSupport AmrLevel::m_eb_support_level = EBSupport::volume; DescriptorList AmrLevel::desc_lst; DeriveList AmrLevel::derive_lst; +void +AmrLevel::post_timestep (int /*iteration*/) +{ + if (level < parent->finestLevel()) { + parent->getLevel(level+1).resetFillPatcher(); + } +} + void AmrLevel::postCoarseTimeStep (Real time) { @@ -102,6 +110,7 @@ AmrLevel::AmrLevel (Amr& papa, } state.resize(desc_lst.size()); + m_fillpatcher.resize(desc_lst.size()); #ifdef AMREX_USE_EB if (EB2::TopIndexSpaceIfPresent()) { @@ -451,6 +460,8 @@ AmrLevel::restart (Amr& papa, } } + m_fillpatcher.resize(ndesc); + if (parent->useFixedCoarseGrids()) constructAreaNotToTag(); post_step_regrid = 0; @@ -2096,6 +2107,63 @@ void AmrLevel::constructAreaNotToTag () } } +void +AmrLevel::resetFillPatcher () +{ + for (auto& fp : m_fillpatcher) { + fp.reset(); + } +} + +void +AmrLevel::FillPatcherFill (MultiFab& mf, int dcomp, int ncomp, int nghost, + Real time, int state_index, int scomp) +{ + if (level == 0) { + FillPatch(*this, mf, nghost, time, state_index, scomp, ncomp, dcomp); + } else { + AmrLevel& fine_level = *this; + AmrLevel& crse_level = parent->getLevel(level-1); + const Geometry& geom_fine = fine_level.geom; + const Geometry& geom_crse = crse_level.geom; + + Vector smf_crse; + Vector stime_crse; + StateData& statedata_crse = crse_level.state[state_index]; + statedata_crse.getData(smf_crse,stime_crse,time); + StateDataPhysBCFunct physbcf_crse(statedata_crse,scomp,geom_crse); + + Vector smf_fine; + Vector stime_fine; + StateData& statedata_fine = fine_level.state[state_index]; + statedata_fine.getData(smf_fine,stime_fine,time); + StateDataPhysBCFunct physbcf_fine(statedata_fine,scomp,geom_fine); + + const StateDescriptor& desc = AmrLevel::desc_lst[state_index]; + + if (level > 1 &&!amrex::ProperlyNested(fine_level.crse_ratio, + parent->blockingFactor(fine_level.level), + nghost, mf.ixType(), + desc.interp(scomp))) { + amrex::Abort("FillPatcherFill: Grids are not properly nested. Must increase blocking factor."); + } + + auto& fillpatcher = m_fillpatcher[state_index]; + if (fillpatcher == nullptr) { + fillpatcher = std::make_unique> + (parent->boxArray(level), parent->DistributionMap(level), geom_fine, + parent->boxArray(level-1), parent->DistributionMap(level-1), geom_crse, + IntVect(nghost), desc.nComp(), desc.interp(scomp)); + } + + fillpatcher->fill(mf, IntVect(nghost), time, + smf_crse, stime_crse, smf_fine, stime_fine, + scomp, dcomp, ncomp, + physbcf_crse, scomp, physbcf_fine, scomp, + desc.getBCs(), scomp); + } +} + void AmrLevel::FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, @@ -2163,4 +2231,23 @@ AmrLevel::CreateLevelDirectory (const std::string &dir) levelDirectoryCreated = true; } +void +AmrLevel::FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle) +{ + StateDataPhysBCFunct physbcf(state[state_index], 0, geom); + + if (level == 0) { + S.FillBoundary(geom.periodicity()); + physbcf(S, 0, S.nComp(), S.nGrowVect(), time, 0); + } else { + auto& crse_level = parent->getLevel(level-1); + StateDataPhysBCFunct physbcf_crse(crse_level.state[state_index], 0, + crse_level.geom); + auto& fillpatcher = m_fillpatcher[state_index]; + fillpatcher->fillRK(stage, iteration, ncycle, S, time, physbcf_crse, + physbcf, AmrLevel::desc_lst[state_index].getBCs()); + } +} + } diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H index 2a7c2e26713..7d5b32d7aa6 100644 --- a/Src/Amr/AMReX_Derive.H +++ b/Src/Amr/AMReX_Derive.H @@ -84,9 +84,9 @@ extern "C" const int* level, const int* grid_no) ; } -typedef void (*DeriveFuncFab) (const amrex::Box& bx, amrex::FArrayBox& derfab, int dcomp, int ncomp, - const amrex::FArrayBox& datafab, const amrex::Geometry& geomdata, - amrex::Real time, const int* bcrec, int level); + typedef std::function DeriveFuncFab; class DescriptorList; diff --git a/Src/Amr/AMReX_StateDescriptor.cpp b/Src/Amr/AMReX_StateDescriptor.cpp index 932479feeb2..1910dcf7b3f 100644 --- a/Src/Amr/AMReX_StateDescriptor.cpp +++ b/Src/Amr/AMReX_StateDescriptor.cpp @@ -42,23 +42,31 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_func != 0 || m_func3D != 0); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, 1); if (thread_safe) { - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void @@ -69,23 +77,32 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_gfunc != 0 || m_gfunc3D != 0); + amrex::ignore_unused(ng); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, ng); if (thread_safe) { - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H index 90f49b02749..1cc8d61fd07 100644 --- a/Src/AmrCore/AMReX_ErrorList.H +++ b/Src/AmrCore/AMReX_ErrorList.H @@ -383,6 +383,7 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); Real m_min_time = std::numeric_limits::lowest(); Real m_max_time = std::numeric_limits::max(); int m_volume_weighting = 0; + int m_derefine = 0; RealBox m_realbox; AMRErrorTagInfo& SetMaxLevel (int max_level) noexcept { @@ -405,6 +406,10 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); m_volume_weighting = volume_weighting; return *this; } + AMRErrorTagInfo& SetDerefine (int derefine) noexcept { + m_derefine = derefine; + return *this; + } }; class AMRErrorTag @@ -415,6 +420,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); struct UserFunc { + virtual ~UserFunc () {} + virtual void operator() (const amrex::Box& bx, amrex::Array4 const& dat, amrex::Array4 const& tag, @@ -465,6 +472,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); const AMRErrorTagInfo& info = AMRErrorTagInfo()) noexcept : m_userfunc(userfunc), m_field(field), m_info(info), m_ngrow(ngrow) {} + virtual ~AMRErrorTag () {} + virtual void operator() (amrex::TagBoxArray& tb, const amrex::MultiFab* mf, char clearval, diff --git a/Src/AmrCore/AMReX_ErrorList.cpp b/Src/AmrCore/AMReX_ErrorList.cpp index 1594ba740a9..6dcb5565227 100644 --- a/Src/AmrCore/AMReX_ErrorList.cpp +++ b/Src/AmrCore/AMReX_ErrorList.cpp @@ -293,80 +293,225 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto threshold = m_value[level]; auto const volume_weighting = m_info.m_volume_weighting; auto geomdata = geom.data(); + auto tag_update = tagval; + if (m_info.m_derefine) { + tag_update = clearval; + } + if (m_test == GRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = 0.; + ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = 0.; + ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { - tagma[bi](i,j,k) = tagval; - } -#endif - }); + Real az = 0.; + az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == RELGRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) - >= threshold * amrex::Math::abs(dat(i,j,k))) { - tagma[bi](i,j,k) = tagval; - } -#endif - }); + Real az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == LESS) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol <= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; if (datma[bi](i,j,k) * vol <= threshold) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } }); + } } else if (m_test == GREATER) { +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; - if (datma[bi](i,j,k) * vol >= threshold) { - tagma[bi](i,j,k) = tagval; - } + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } }); } else if (m_test == VORT) { const Real fac = threshold * Real(std::pow(2,level)); - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { - if (datma[bi](i,j,k) >= fac) { - tagma[bi](i,j,k) = tagval; - } - }); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + }); + } } else { diff --git a/Src/AmrCore/AMReX_FillPatchUtil.H b/Src/AmrCore/AMReX_FillPatchUtil.H index 51a5f457391..495cbc180b6 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil.H +++ b/Src/AmrCore/AMReX_FillPatchUtil.H @@ -28,12 +28,17 @@ namespace amrex { - template + template struct NullInterpHook { - void operator() (FAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (MFFAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} - void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + + template ::value,int> = 0> + void operator() (MFFAB& /*mf*/, int /*icomp*/, int /*ncomp*/) const {} }; template diff --git a/Src/AmrCore/AMReX_FillPatchUtil_I.H b/Src/AmrCore/AMReX_FillPatchUtil_I.H index 8d8f210a0fe..3e94abfad27 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil_I.H +++ b/Src/AmrCore/AMReX_FillPatchUtil_I.H @@ -4,6 +4,31 @@ namespace amrex { +namespace detail { + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf[0],Box(),icomp,ncomp)) +{ +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf); mfi.isValid(); ++mfi) { + auto& dfab = mf[mfi]; + const Box& dbx = dfab.box(); + f(dfab, dbx, icomp, ncomp); + } +} + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf,icomp,ncomp)) +{ + f(mf, icomp, ncomp); +} + +} + template bool ProperlyNested (const IntVect& ratio, const IntVect& blocking_factor, int ngrow, const IndexType& boxType, Interp* mapper) @@ -459,9 +484,6 @@ namespace { if ( ! fpc.ba_crse_patch.empty()) { - - using FAB = typename MF::FABType::value_type; - MF mf_crse_patch = make_mf_crse_patch (fpc, ncomp, mf.boxArray().ixType()); // Must make sure fine exists under needed coarse faces. // It stores values for the final (interior) interpolation, @@ -491,20 +513,12 @@ namespace { solve_mask.setVal(1); // Values to solve. solve_mask.setVal(0, mask_cpc, 0, 1); // Known values. - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); InterpFace(mapper, mf_crse_patch, 0, mf_refined_patch, 0, ncomp, ratio, solve_mask, cgeom, fgeom, bcscomp, RunOn::Gpu, bcs); - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& dfab = mf_refined_patch[mfi]; - post_interp(dfab, dfab.box(), 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_refined_patch, 0, ncomp); bool aliasing = false; for (auto const& fmf_a : fmf) { @@ -538,30 +552,14 @@ namespace { MF mf_fine_patch = make_mf_fine_patch(fpc, ncomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - auto& sfab = mf_crse_patch[mfi]; - const Box& sbx = sfab.box(); - pre_interp(sfab, sbx, 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf_fine_patch, 0, mf_crse_patch, 0, ncomp, IntVect(0), cgeom, fgeom, amrex::grow(amrex::convert(fgeom.Domain(),mf.ixType()),nghost), ratio, mapper, bcs, bcscomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_fine_patch); mfi.isValid(); ++mfi) - { - auto& dfab = mf_fine_patch[mfi]; - const Box& dbx = dfab.box(); - post_interp(dfab, dbx, 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_fine_patch, 0, ncomp); mf.ParallelCopy(mf_fine_patch, 0, dcomp, ncomp, IntVect{0}, nghost); } @@ -1024,14 +1022,7 @@ InterpFromCoarseLevel (MF& mf, IntVect const& nghost, Real time, cbc(mf_crse_patch, 0, ncomp, mf_crse_patch.nGrowVect(), time, cbccomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf, dcomp, mf_crse_patch, 0, ncomp, nghost, cgeom, fgeom, fdomain_g, ratio, mapper, bcs, bcscomp); diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H new file mode 100644 index 00000000000..d0e775416ee --- /dev/null +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -0,0 +1,585 @@ +#ifndef AMREX_FILLPATCHER_H_ +#define AMREX_FILLPATCHER_H_ +#include + +#include + +namespace amrex { + +/** + * \brief FillPatcher is for filling a fine level MultiFab/FabArray. + * + * This class is not as general as the FillPatchTwoLevels functions. It + * fills the fine ghost cells not overlapping any fine level valid cells + * with interpolation of the coarse data. Then it fills the fine ghost + * cells overlapping fine level valid cells with the fine level data. If + * the valid cells of the destination need to be filled, it will be done as + * well. Finally, it will fill the physical bounbary using the user + * provided functor. The `fill` member function can be used to do the + * operations just described. Alternatively, one can also use the + * `fillCoarseFineBounary` to fill the ghost cells at the coarse/fine + * boundary only. Then one can manually call FillBoundary to fill the other + * ghost cells, and use the physical BC functor to handle the physical + * boundeary. + * + * The communication of the coarse data needed for spatial interpolation is + * optimized at the cost of being error-prone. One must follow the + * following guidelines. + * + * (1) This class is for filling data during time stepping, not during + * regrid. The fine level data passed as input must have the same BoxArray + * and DistributionMapping as the destination. It's OK they are the same + * MultiFab. For AmrLevel based codes, AmrLevel::FillPatcherFill wil try to + * use FillPatcher if it can, and AmrLevel::FillPatch will use the fillpatch + * functions. + * + * (2) When to build? It is recommended that one uses `std::unique_ptr` to + * store the FillPatcher object, and build it only when it is needed and + * it's a nullptr. For AmrLevel based codes, the AmrLevel class will build + * it for you as needed when you call the AmrLevel::FillPatcherFill + * function. + * + * (3) When to destroy? Usually, we do time steppig on a coarse level + * first. Then we recursively do time stepping on fine levels. After the + * finer level finishes, we do reflux and averge the fine data down to the + * coarse level. After that we should destroy the FillPatcher object + * associated with these two levels, because the coarse data stored in the + * object has become outdated. For AmrCore based codes, you could use + * Tests/Amr/Advection_AmrCore as an example. For AmrLevel based codes, you + * should do this in the post_timestep virtual function (see + * Tests/Amr/Advection_AmrLevel for an example). + * + * (4) The source MultiFabs/FabArrays (i.e., the crse_data and fine_data + * arguments of the fill function) need to have exactly the same number of + * components as the ncomp argument of the constructor, even though it's + * allowed to fill only some of the components with the fill function. + * + * (5) This only works for cell-centered and nodal data. + * + * This class also provides support for RungeKutta::RK3 and RungeKutta::RK4. + * The storeRKCoarseData function can be used to store coarse AMR level + * data that are needed for filling fine level data's ghost cells in this + * class. The `fillRK` function can be used to fill ghost cells for fine + * AMR levels. This operation at the coarse/fine boundary is non-trivial + * for RK orders higher than 2. Note that it is expected that time stepping + * on the coarse level is perform before any fine level time stepping, and + * it's the user's reponsibility to properly create and destroy this object. + * See AmrLevel::RK for an example of using the RungeKutta functions and + * FillPatcher together. + */ + +template +class FillPatcher +{ +public: + + /** + * \brief Constructor of FillPatcher + * + * \param fba fine level BoxArray + * \param fdm fine level DistributionMapping + * \param fgeom fine level Geometry + * \param cba coarse level BoxArray + * \param cdm coarse level DistributionMapping + * \param cgeom coarse level Geometry + * \param nghost max number of ghost cells to be filled at coarse/fine boundary + * \param ncomp the number of components + * \param interp for spatial interpolation + * \param eb_index_space optional argument for specifying EB IndexSpace + */ + FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, +#ifdef AMREX_USE_EB + EB2::IndexSpace const* eb_index_space = EB2::TopIndexSpaceIfPresent()); +#else + EB2::IndexSpace const* eb_index_space = nullptr); +#endif + + /** + * \brief Function to fill data + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param fine_data fine level data + * \param fine_time time associated with the fine data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param fbc for filling fine level physical BC + * \param fbccomp starting component of the fine level BC functor + * \param bcs BCRec specifying physical boundary types + * \parame bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fill (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, Vector const& crse_time, + Vector const& fine_data, Vector const& fine_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + + /** + * \brief Function to fill data at coarse/fine boundary only + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param bcs BCRec specifying physical boundary types + * \param bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, + Vector const& crse_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + + /** + * \brief Store coarse AMR level data for RK3 and RK4 + * + * \tparam order RK order. Must be 3 or 4. + * \param time time at the beginning of the step + * \param dt time step + * \param S_old data at time + * \param RK_k right-hand side at RK stages + */ + template + void storeRKCoarseData (Real time, Real dt, MF const& S_old, + Array const& RK_k); + + /** + * \brief Fill ghost cells of fine AMR level for RK3 and RK4 + * + * \param stage RK stage number starting from 1 + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param cbc filling physical boundary on coarse level + * \param fbc filling physical boundary on fine level + * \param bcs physical BC types + */ + template + void fillRK (int stage, int iteration, int ncycle, MF& mf, Real time, + BC& cbc, BC& fbc, Vector const& bcs); + +private: + + BoxArray m_fba; + BoxArray m_cba; + DistributionMapping m_fdm; + DistributionMapping m_cdm; + Geometry m_fgeom; + Geometry m_cgeom; + IntVect m_nghost; + int m_ncomp; + InterpBase* m_interp; + EB2::IndexSpace const* m_eb_index_space = nullptr; + MF m_sfine; + IntVect m_ratio; + Vector>> m_cf_crse_data; + std::unique_ptr m_cf_crse_data_tmp; + std::unique_ptr m_cf_fine_data; + Real m_dt_coarse = std::numeric_limits::lowest(); + + FabArrayBase::FPinfo const& getFPinfo (); +}; + +template +FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, + EB2::IndexSpace const* eb_index_space) + : m_fba(fba), + m_cba(cba), + m_fdm(fdm), + m_cdm(cdm), + m_fgeom(fgeom), + m_cgeom(cgeom), + m_nghost(nghost), + m_ncomp(ncomp), + m_interp(interp), + m_eb_index_space(eb_index_space), + m_sfine(fba, fdm, 1, nghost, MFInfo().SetAlloc(false)) +{ + static_assert(IsFabArray::value, + "FillPatcher: MF must be FabArray type"); + AMREX_ALWAYS_ASSERT(m_fba.ixType().cellCentered() || m_fba.ixType().nodeCentered()); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); + } + AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),m_ratio)); +} + +template +template +void +FillPatcher::fill (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, Vector const& ct, + Vector const& fmf, Vector const& ft, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fill()"); + + AMREX_ALWAYS_ASSERT(m_fba == fmf[0]->boxArray() && + m_fdm == fmf[0]->DistributionMap()); + + fillCoarseFineBoundary(mf, nghost, time, cmf, ct, scomp, dcomp, ncomp, + cbc, cbccomp, bcs, bcscomp, pre_interp, post_interp); + + FillPatchSingleLevel(mf, nghost, time, fmf, ft, scomp, dcomp, ncomp, + m_fgeom, fbc, fbccomp); +} + +template +FabArrayBase::FPinfo const& +FillPatcher::getFPinfo () +{ + const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(m_ratio); + return FabArrayBase::TheFPinfo(m_sfine, m_sfine, m_nghost, coarsener, + m_fgeom, m_cgeom, m_eb_index_space); +} + +template +template +void +FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, + Vector const& ct, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fillCFB"); + + AMREX_ALWAYS_ASSERT(nghost.allLE(m_nghost) && + m_fba == mf.boxArray() && + m_fdm == mf.DistributionMap() && + m_cba == cmf[0]->boxArray() && + m_cdm == cmf[0]->DistributionMap() && + m_ncomp >= ncomp && + m_ncomp == cmf[0]->nComp()); + + auto const& fpc = getFPinfo(); + + if ( ! fpc.ba_crse_patch.empty()) + { + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique + (make_mf_fine_patch(fpc, m_ncomp)); + } + + int ncmfs = cmf.size(); + for (int icmf = 0; icmf < ncmfs; ++icmf) { + Real t = ct[icmf]; + auto it = std::find_if(m_cf_crse_data.begin(), m_cf_crse_data.end(), + [=] (auto const& x) { + return amrex::almostEqual(x.first,t,5); + }); + + if (it == std::end(m_cf_crse_data)) { + MF mf_crse_patch = make_mf_crse_patch(fpc, m_ncomp); + mf_crse_patch.ParallelCopy(*cmf[icmf], m_cgeom.periodicity()); + + std::pair> tmp; + tmp.first = t; + tmp.second = std::make_unique(std::move(mf_crse_patch)); + m_cf_crse_data.push_back(std::move(tmp)); + } + } + + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + if (m_cf_crse_data.size() > 0 && + amrex::almostEqual(time, m_cf_crse_data[0].first,5)) + { + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[0].second, + scomp, 0, ncomp, 0); + } + else if (m_cf_crse_data.size() > 1 && + amrex::almostEqual(time, m_cf_crse_data[1].first,5)) + { + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[1].second, + scomp, 0, ncomp, 0); + } + else if (m_cf_crse_data.size() == 2) + { + int const ng_space_interp = 8; // Need to be big enough + Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); + domain.convert(mf.ixType()); + Real t0 = m_cf_crse_data[0].first; + Real t1 = m_cf_crse_data[1].first; + Real alpha = (t1-time)/(t1-t0); + Real beta = (time-t0)/(t1-t0); + AMREX_ASSERT(alpha >= 0._rt && beta >= 0._rt); + auto const& a = m_cf_crse_data_tmp->arrays(); + auto const& a0 = m_cf_crse_data[0].second->const_arrays(); + auto const& a1 = m_cf_crse_data[1].second->const_arrays(); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + if (domain.contains(i,j,k)) { + a[bi](i,j,k,n) + = alpha*a0[bi](i,j,k,scomp+n) + + beta*a1[bi](i,j,k,scomp+n); + } + }); + Gpu::streamSynchronize(); + } + else + { + amrex::Abort("FillPatcher: High order interpolation in time not supported. Or FillPatcher was not properly deleted."); + } + + cbc(*m_cf_crse_data_tmp, 0, ncomp, nghost, time, cbccomp); + + detail::call_interp_hook(pre_interp, *m_cf_crse_data_tmp, 0, ncomp); + + FillPatchInterp(*m_cf_fine_data, scomp, *m_cf_crse_data_tmp, 0, + ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),nghost), + m_ratio, m_interp, bcs, bcscomp); + + detail::call_interp_hook(post_interp, *m_cf_fine_data, scomp, ncomp); + + mf.ParallelCopy(*m_cf_fine_data, scomp, dcomp, ncomp, IntVect{0}, nghost); + } +} + +template +template +void FillPatcher::storeRKCoarseData (Real /*time*/, Real dt, MF const& S_old, + Array const& RK_k) +{ + m_dt_coarse = dt; + m_cf_crse_data.resize(order+1); + + auto const& fpc = getFPinfo(); + + for (auto& tmf : m_cf_crse_data) { + tmf.first = std::numeric_limits::lowest(); // because we dont' need it + tmf.second = std::make_unique(make_mf_crse_patch(fpc, m_ncomp)); + } + m_cf_crse_data[0].second->ParallelCopy(S_old, m_cgeom.periodicity()); + for (std::size_t i = 0; i < order; ++i) { + m_cf_crse_data[i+1].second->ParallelCopy(RK_k[i], m_cgeom.periodicity()); + } +} + +template +template +void FillPatcher::fillRK (int stage, int iteration, int ncycle, + MF& mf, Real time, BC& cbc, BC& fbc, + Vector const& bcs) +{ + int rk_order = m_cf_crse_data.size()-1; + if (rk_order != 3 && rk_order != 4) { + amrex::Abort("FillPatcher: unsupported RK order "+std::to_string(rk_order)); + return; + } + AMREX_ASSERT(stage > 0 && stage <= rk_order); + + auto const& fpc = getFPinfo(); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + auto const& u = m_cf_crse_data_tmp->arrays(); + auto const& u0 = m_cf_crse_data[0].second->const_arrays(); + auto const& k1 = m_cf_crse_data[1].second->const_arrays(); + auto const& k2 = m_cf_crse_data[2].second->const_arrays(); + auto const& k3 = m_cf_crse_data[3].second->const_arrays(); + + Real dtc = m_dt_coarse; + Real r = Real(1) / Real(ncycle); + Real xsi = Real(iteration-1) / Real(ncycle); + + if (rk_order == 3) { + // coefficients for U + Real b1 = xsi - Real(5./6.)*xsi*xsi; + Real b2 = Real(1./6.)*xsi*xsi; + Real b3 = Real(2./3)*xsi*xsi; + // coefficients for Ut + Real c1 = Real(1.) - Real(5./3.)*xsi; + Real c2 = Real(1./3.)*xsi; + Real c3 = Real(4./3.)*xsi; + // coefficients for Utt + constexpr Real d1 = Real(-5./3.); + constexpr Real d2 = Real(1./3.); + constexpr Real d3 = Real(4./3.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + r*ut); + }); + } else if (stage == 3) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + Real utt = d1*kk1 + d2*kk2 + d3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc* + (uu + Real(0.5)*r*ut + Real(0.25)*r*r*utt); + }); + } + } else if (rk_order == 4) { + auto const& k4 = m_cf_crse_data[4].second->const_arrays(); + Real xsi2 = xsi*xsi; + Real xsi3 = xsi2*xsi; + // coefficients for U + Real b1 = xsi - Real(1.5)*xsi2 + Real(2./3.)*xsi3; + Real b2 = xsi2 - Real(2./3.)*xsi3; + Real b3 = b2; + Real b4 = Real(-0.5)*xsi2 + Real(2./3.)*xsi3; + // coefficients for Ut + Real c1 = Real(1.) - Real(3.)*xsi + Real(2.)*xsi2; + Real c2 = Real(2.)*xsi - Real(2.)*xsi2; + Real c3 = c2; + Real c4 = -xsi + Real(2.)*xsi2; + // coefficients for Utt + Real d1 = Real(-3.) + Real(4.)*xsi; + Real d2 = Real( 2.) - Real(4.)*xsi; + Real d3 = d2; + Real d4 = Real(-1.) + Real(4.)*xsi; + // coefficients for Uttt + constexpr Real e1 = Real( 4.); + constexpr Real e2 = Real(-4.); + constexpr Real e3 = Real(-4.); + constexpr Real e4 = Real( 4.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + Real(0.5)*r*ut); + }); + } else if (stage == 3 || stage == 4) { + Real r2 = r*r; + Real r3 = r2*r; + Real at = (stage == 3) ? Real(0.5)*r : r; + Real att = (stage == 3) ? Real(0.25)*r2 : Real(0.5)*r2; + Real attt = (stage == 3) ? Real(0.0625)*r3 : Real(0.125)*r3; + Real akk = (stage == 3) ? Real(-4.) : Real(4.); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + Real utt = d1*kk1 + d2*kk2 + d3*kk3 + d4*kk4; + Real uttt = e1*kk1 + e2*kk2 + e3*kk3 + e4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc * + (uu + at*ut + att*utt + attt*(uttt+akk*(kk3-kk2))); + }); + } + } + Gpu::streamSynchronize(); + + cbc(*m_cf_crse_data_tmp, 0, m_ncomp, m_nghost, time, 0); + + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique(make_mf_fine_patch(fpc, m_ncomp)); + } + + FillPatchInterp(*m_cf_fine_data, 0, *m_cf_crse_data_tmp, 0, + m_ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),m_nghost), + m_ratio, m_interp, bcs, 0); + + // xxxxx We can optimize away this ParallelCopy by making a special fpinfo. + mf.ParallelCopy(*m_cf_fine_data, 0, 0, m_ncomp, IntVect(0), m_nghost); + + mf.FillBoundary(m_fgeom.periodicity()); + fbc(mf, 0, m_ncomp, m_nghost, time, 0); +} + +} + +#endif diff --git a/Src/AmrCore/AMReX_Interp_C.H b/Src/AmrCore/AMReX_Interp_C.H index e12c4495fde..967d3aaa177 100644 --- a/Src/AmrCore/AMReX_Interp_C.H +++ b/Src/AmrCore/AMReX_Interp_C.H @@ -135,5 +135,53 @@ face_linear_interp_z (int i, int j, int k, int n, amrex::Array4 con } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_x (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int ii = amrex::coarsen(i,2); + int s = 2*(i-ii*2) - 1; // if i == ii*2, s = -1; if i == ii*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(ii-2,j,k,n) + + c( -s)*crse(ii-1,j,k,n) + + c( 0)*crse(ii ,j,k,n) + + c( s)*crse(ii+1,j,k,n) + + c( 2*s)*crse(ii+2,j,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_y (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int jj = amrex::coarsen(j,2); + int s = 2*(j-jj*2) - 1; // if j == jj*2, s = -1; if j == jj*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,jj-2,k,n) + + c( -s)*crse(i,jj-1,k,n) + + c( 0)*crse(i,jj ,k,n) + + c( s)*crse(i,jj+1,k,n) + + c( 2*s)*crse(i,jj+2,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_z (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int kk = amrex::coarsen(k,2); + int s = 2*(k-kk*2) - 1; // if k == kk*2, s = -1; if k == kk*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,j,kk-2,n) + + c( -s)*crse(i,j,kk-1,n) + + c( 0)*crse(i,j,kk ,n) + + c( s)*crse(i,j,kk+1,n) + + c( 2*s)*crse(i,j,kk+2,n); +} + } #endif diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index 06398b73097..bdb6cf9d46b 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -844,6 +844,74 @@ public: }; +/** +* \brief Quartic interpolation on cell centered data. +* +* Quartic interpolation on cell centered data. +*/ + +class CellQuartic + : + public Interpolater +{ +public: + + /** + * \brief The constructor. + */ + explicit CellQuartic (); + + /** + * \brief The destructor. + */ + virtual ~CellQuartic () override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, int ratio) override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, const IntVect& ratio) override; + + /** + * \brief Coarse to fine interpolation in space. + * + * \param crse + * \param crse_comp + * \param fine + * \param fine_comp + * \param ncomp + * \param fine_region + * \param ratio + * \param crse_geom + * \param fine_geom + * \param bcr + * \param actual_comp + * \param actual_state + */ + virtual void interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& crse_geom, + const Geometry& fine_geom, + Vector const& bcr, + int actual_comp, + int actual_state, + RunOn gpu_or_cpu) override; +}; //! CONSTRUCT A GLOBAL OBJECT OF EACH VERSION. extern AMREX_EXPORT PCInterp pc_interp; @@ -856,6 +924,7 @@ extern AMREX_EXPORT CellBilinear cell_bilinear_interp; extern AMREX_EXPORT CellConservativeProtected protected_interp; extern AMREX_EXPORT CellConservativeQuartic quartic_interp; extern AMREX_EXPORT CellQuadratic quadratic_interp; +extern AMREX_EXPORT CellQuartic cell_quartic_interp; } diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index a78eac89aa0..8042aa2f322 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -18,6 +18,8 @@ namespace amrex { * * CellQuadratic only works in 2D and 3D on cpu and gpu. * + * CellQuartic works in 1D, 2D and 3D on cpu and gpu with ref ratio of 2 + * * CellConservativeQuartic only works with ref ratio of 2 on cpu and gpu. * * FaceDivFree works in 2D and 3D on cpu and gpu. @@ -37,6 +39,7 @@ CellConservativeProtected protected_interp; CellConservativeQuartic quartic_interp; CellBilinear cell_bilinear_interp; CellQuadratic quadratic_interp; +CellQuartic cell_quartic_interp; NodeBilinear::~NodeBilinear () {} @@ -988,4 +991,94 @@ FaceDivFree::interp_arr (Array const& crse, }); } +CellQuartic::CellQuartic () {} + +CellQuartic::~CellQuartic () {} + +Box +CellQuartic::CoarseBox (const Box& fine, const IntVect& ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +Box +CellQuartic::CoarseBox (const Box& fine, int ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +void +CellQuartic::interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& /*crse_geom*/, + const Geometry& /*fine_geom*/, + Vector const& /*bcr*/, + int /* actual_comp */, + int /* actual_state */, + RunOn runon) +{ + BL_PROFILE("CellQuartic::interp()"); + amrex::ignore_unused(ratio); + AMREX_ASSERT(ratio == 2); + + Box target_fine_region = fine_region & fine.box(); + + bool run_on_gpu = (runon == RunOn::Gpu && Gpu::inLaunchRegion()); + amrex::ignore_unused(run_on_gpu); + + Array4 const& crsearr = crse.const_array(crse_comp); + Array4 const& finearr = fine.array(fine_comp); + +#if (AMREX_SPACEDIM == 3) + Box bz = amrex::coarsen(target_fine_region, IntVect(2,2,1)); + bz.grow(IntVect(2,2,0)); + FArrayBox tmpz(bz, ncomp); + Elixir tmpz_eli; + if (run_on_gpu) tmpz_eli = tmpz.elixir(); + Array4 const& tmpzarr = tmpz.array(); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, bz, ncomp, i, j, k, n, + { + cell_quartic_interp_z(i,j,k,n,tmpzarr,crsearr); + }); +#endif + +#if (AMREX_SPACEDIM >= 2) + Box by = amrex::coarsen(target_fine_region, IntVect(AMREX_D_DECL(2,1,1))); + by.grow(IntVect(AMREX_D_DECL(2,0,0))); + FArrayBox tmpy(by, ncomp); + Elixir tmpy_eli; + if (run_on_gpu) tmpy_eli = tmpy.elixir(); + Array4 const& tmpyarr = tmpy.array(); +#if (AMREX_SPACEDIM == 2) + Array4 srcarr = crsearr; +#else + Array4 srcarr = tmpz.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, by, ncomp, i, j, k, n, + { + cell_quartic_interp_y(i,j,k,n,tmpyarr,srcarr); + }); +#endif + +#if (AMREX_SPACEDIM == 1) + Array4 srcarr = crsearr; +#else + srcarr = tmpy.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, target_fine_region, ncomp, + i, j, k, n, + { + cell_quartic_interp_x(i,j,k,n,finearr,srcarr); + }); +} + } diff --git a/Src/AmrCore/AMReX_MFInterp_1D_C.H b/Src/AmrCore/AMReX_MFInterp_1D_C.H index 37751acc3b9..8fcadec5794 100644 --- a/Src/AmrCore/AMReX_MFInterp_1D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_1D_C.H @@ -149,9 +149,10 @@ void mf_cell_cons_lin_interp_sph (int i, int ns, Array4 const& fine, int f + xoff * slope(ic,0,0,ns); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int ioff = i - ic*ratio[0]; diff --git a/Src/AmrCore/AMReX_MFInterp_2D_C.H b/Src/AmrCore/AMReX_MFInterp_2D_C.H index c505ef2655c..e02084e2e8e 100644 --- a/Src/AmrCore/AMReX_MFInterp_2D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_2D_C.H @@ -189,9 +189,10 @@ void mf_cell_cons_lin_interp_rz (int i, int j, int ns, Array4 const& fine, + yoff * slope(ic,jc,0,ns+ncomp); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); diff --git a/Src/AmrCore/AMReX_MFInterp_3D_C.H b/Src/AmrCore/AMReX_MFInterp_3D_C.H index dc0da5dba40..17d14ff689b 100644 --- a/Src/AmrCore/AMReX_MFInterp_3D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_3D_C.H @@ -128,9 +128,10 @@ void mf_cell_cons_lin_interp (int i, int j, int k, int ns, Array4 const& f + zoff * slope(ic,jc,kc,ns+ncomp*2); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index 6a989ffbbf1..3ec7425e283 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -441,7 +441,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const std::partial_sum(nblocks.begin(), nblocks.end(), blockoffset.begin()+1); int ntotblocks = blockoffset.back(); - PODVector > dv_ntags(ntotblocks); + Gpu::NonManagedDeviceVector dv_ntags(ntotblocks); for (MFIter fai(*this); fai.isValid(); ++fai) { @@ -491,21 +491,21 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const #endif } - PODVector > hv_ntags(ntotblocks); + Gpu::PinnedVector hv_ntags(ntotblocks); Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int)); - PODVector > hv_tags_offset(ntotblocks+1); + Gpu::PinnedVector hv_tags_offset(ntotblocks+1); hv_tags_offset[0] = 0; std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1); int ntotaltags = hv_tags_offset.back(); if (ntotaltags == 0) return; - PODVector > dv_tags_offset(ntotblocks); + Gpu::NonManagedDeviceVector dv_tags_offset(ntotblocks); int* dp_tags_offset = dv_tags_offset.data(); Gpu::htod_memcpy_async(dp_tags_offset, hv_tags_offset.data(), ntotblocks*sizeof(int)); - PODVector > dv_tags(ntotaltags); + Gpu::NonManagedDeviceVector dv_tags(ntotaltags); IntVect* dp_tags = dv_tags.data(); int iblock = 0; @@ -649,7 +649,24 @@ TagBoxArray::collate (Gpu::PinnedVector& TheGlobalCollateSpace) const // const IntVect* psend = (count > 0) ? TheLocalCollateSpace.data() : nullptr; IntVect* precv = TheGlobalCollateSpace.data(); + + // Issues have been observed with the following call at very large scale when using + // FujitsuMPI. The issue seems to be related to the use of MPI_Datatype. We can + // bypasses the issue by exchanging simpler integer arrays. +#if !(defined(__FUJITSU) || defined(__CLANG_FUJITSU)) ParallelDescriptor::Gatherv(psend, count, precv, countvec, offset, IOProcNumber); +#else + const int* psend_int = psend->begin(); + int* precv_int = precv->begin(); + Long count_int = count * AMREX_SPACEDIM; + auto countvec_int = std::vector(countvec.size()); + auto offset_int = std::vector(offset.size()); + const auto mul_funct = [](const auto el){return el*AMREX_SPACEDIM;}; + std::transform(countvec.begin(), countvec.end(), countvec_int.begin(), mul_funct); + std::transform(offset.begin(), offset.end(), offset_int.begin(), mul_funct); + ParallelDescriptor::Gatherv( + psend_int, count_int, precv_int, countvec_int, offset_int, IOProcNumber); +#endif #else TheGlobalCollateSpace = std::move(TheLocalCollateSpace); diff --git a/Src/AmrCore/CMakeLists.txt b/Src/AmrCore/CMakeLists.txt index f9ff24f243b..be7c87eee4f 100644 --- a/Src/AmrCore/CMakeLists.txt +++ b/Src/AmrCore/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(amrex AMReX_FluxRegister.cpp AMReX_FillPatchUtil.H AMReX_FillPatchUtil_I.H + AMReX_FillPatcher.H AMReX_FluxRegister.H AMReX_InterpBase.H AMReX_InterpBase.cpp diff --git a/Src/AmrCore/Make.package b/Src/AmrCore/Make.package index 5b3afa61ccb..df3c2e83d40 100644 --- a/Src/AmrCore/Make.package +++ b/Src/AmrCore/Make.package @@ -6,6 +6,8 @@ CEXE_sources += AMReX_AmrCore.cpp AMReX_Cluster.cpp AMReX_ErrorList.cpp AMReX_Fi AMReX_Interpolater.cpp AMReX_MFInterpolater.cpp AMReX_TagBox.cpp AMReX_AmrMesh.cpp \ AMReX_InterpBase.cpp +CEXE_headers += AMReX_FillPatcher.H + CEXE_headers += AMReX_Interp_C.H AMReX_Interp_$(DIM)D_C.H CEXE_headers += AMReX_MFInterp_C.H AMReX_MFInterp_$(DIM)D_C.H diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index e02280f3e3b..91f8fc43b7c 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -271,7 +271,7 @@ namespace amrex private: - static std::vector > m_instance; + static AMREX_EXPORT std::vector > m_instance; Geometry* m_geom = nullptr; }; diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index f06806babcd..76488bf81e0 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -123,6 +123,11 @@ namespace { #ifdef AMREX_USE_HYPRE namespace { int init_hypre = 1; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + int hypre_spgemm_use_vendor = 0; + int hypre_spmv_use_vendor = 0; + int hypre_sptrans_use_vendor = 0; +#endif } #endif @@ -489,6 +494,11 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #ifdef AMREX_USE_HYPRE pp.queryAdd("init_hypre", init_hypre); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + pp.queryAdd("hypre_spgemm_use_vendor", hypre_spgemm_use_vendor); + pp.queryAdd("hypre_spmv_use_vendor", hypre_spmv_use_vendor); + pp.queryAdd("hypre_sptrans_use_vendor", hypre_sptrans_use_vendor); +#endif #endif } @@ -526,7 +536,7 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #ifdef AMREX_USE_HYPRE if (init_hypre) { HYPRE_Init(); -#ifdef HYPRE_USING_CUDA +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) #if defined(HYPRE_RELEASE_NUMBER) && (HYPRE_RELEASE_NUMBER >= 22400) @@ -541,9 +551,13 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, HYPRE_SetGPUMemoryPoolSize( mempool_bin_growth, mempool_min_bin, mempool_max_bin, mempool_max_cached_bytes ); #endif - /* This API below used to be HYPRE_SetSpGemmUseCusparse(). This was changed in commit - Hypre master commit dfdd1cd12f */ - HYPRE_SetSpGemmUseVendor(false); +#if (HYPRE_RELEASE_NUMBER >= 22500) + HYPRE_SetSpGemmUseVendor(hypre_spgemm_use_vendor); + HYPRE_SetSpMVUseVendor(hypre_spmv_use_vendor); + HYPRE_SetSpTransUseVendor(hypre_sptrans_use_vendor); +#elif (HYPRE_USING_CUDA) + HYPRE_SetSpGemmUseCusparse(hypre_spgemm_use_vendor); +#endif HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE); HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE); HYPRE_SetUseGpuRand(true); diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index b5a5f4973c7..65a5f8cb763 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -145,7 +145,7 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE I bisect (T const* d, I lo, I hi, T const& v) { while (lo <= hi) { - int mid = (lo+hi)/2; + int mid = lo + (hi-lo)/2; if (v >= d[mid] && v < d[mid+1]) { return mid; } else if (v < d[mid]) { @@ -157,6 +157,57 @@ namespace amrex return hi; } + template + AMREX_GPU_HOST_DEVICE + ItType upper_bound (ItType first, ItType last, const ValType& val) + { +#if AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0){ + auto it = first; + const auto step = count/2; + it += step; + if (!(val < *it)){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::upper_bound(first, last, val); +#endif + } + + template + AMREX_GPU_HOST_DEVICE + ItType lower_bound (ItType first, ItType last, const ValType& val) + { +#ifdef AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0) + { + auto it = first; + const auto step = count/2; + it += step; + if (*it < val){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::lower_bound(first, last, val); +#endif + } + namespace detail { struct clzll_tag {}; diff --git a/Src/Base/AMReX_Any.H b/Src/Base/AMReX_Any.H index b57aa9a39ef..2c7d9688d36 100644 --- a/Src/Base/AMReX_Any.H +++ b/Src/Base/AMReX_Any.H @@ -48,11 +48,25 @@ public: //! Returns a reference to the contained object. template - MF& get () { return dynamic_cast&>(*m_ptr).m_mf; } + MF& get () { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else { + return dynamic_cast&>(*m_ptr).m_mf; + } + } //! Returns a const reference to the contained object. template - MF const& get () const { return dynamic_cast const&>(*m_ptr).m_mf; } + MF const& get () const { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else if (auto p1 = dynamic_cast*>(m_ptr.get())) { + return p1->m_mf; + } else { + return dynamic_cast const&>(*m_ptr).m_mf; + } + } template bool is () const { return m_ptr->Type() == typeid(MF); } @@ -60,15 +74,18 @@ public: private: struct innards_base { virtual const std::type_info& Type () const = 0; + virtual ~innards_base () = default; }; template struct innards : innards_base { - innards(MF && mf) + innards (MF && mf) : m_mf(std::forward(mf)) {} + virtual ~innards () = default; + virtual const std::type_info& Type () const override { return typeid(MF); } diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index c14fced3872..f7a46dc25c8 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -14,11 +14,11 @@ ///#include //#define AMREX_MLOCK(x,y) VirtualLock(x,y) //#define AMREX_MUNLOCK(x,y) VirtualUnlock(x,y) -#define AMREX_MLOCK(x,y) ((void)0) +//#define AMREX_MLOCK(x,y) ((void)0) #define AMREX_MUNLOCK(x,y) ((void)0) #else #include -#define AMREX_MLOCK(x,y) mlock(x,y) +//#define AMREX_MLOCK(x,y) mlock(x,y) #define AMREX_MUNLOCK(x,y) munlock(x,y) #endif @@ -132,19 +132,21 @@ Arena::allocate_system (std::size_t nbytes) if (arena_info.use_cpu_memory) { p = std::malloc(nbytes); +#ifndef _WIN32 #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif #endif } else if (arena_info.device_use_hostalloc) { AMREX_HIP_OR_CUDA_OR_DPCPP( - AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped));, + AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped|hipHostMallocNonCoherent));, AMREX_CUDA_SAFE_CALL(cudaHostAlloc(&p, nbytes, cudaHostAllocMapped));, p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())); } @@ -190,7 +192,16 @@ Arena::allocate_system (std::size_t nbytes) } #else p = std::malloc(nbytes); - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); +#ifndef _WIN32 +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +#endif #endif if (p == nullptr) amrex::Abort("Sorry, malloc failed"); return p; @@ -253,12 +264,13 @@ Arena::Initialize () if (initialized) return; initialized = true; - BL_ASSERT(the_arena == nullptr); + // see reason on allowed reuse of the default CPU BArena in Arena::Finalize + BL_ASSERT(the_arena == nullptr || the_arena == The_BArena()); BL_ASSERT(the_async_arena == nullptr); - BL_ASSERT(the_device_arena == nullptr); - BL_ASSERT(the_managed_arena == nullptr); + BL_ASSERT(the_device_arena == nullptr || the_device_arena == The_BArena()); + BL_ASSERT(the_managed_arena == nullptr || the_managed_arena == The_BArena()); BL_ASSERT(the_pinned_arena == nullptr); - BL_ASSERT(the_cpu_arena == nullptr); + BL_ASSERT(the_cpu_arena == nullptr || the_cpu_arena == The_BArena()); #ifdef AMREX_USE_GPU #ifdef AMREX_USE_DPCPP @@ -304,7 +316,7 @@ Arena::Initialize () the_async_arena = new PArena(the_async_arena_release_threshold); #ifdef AMREX_USE_GPU - if (the_arena->isDevice() || the_arena->isManaged()) { + if (the_arena->isDevice()) { the_device_arena = the_arena; } else { the_device_arena = new CArena(0, ArenaInfo{}.SetDeviceMemory().SetReleaseThreshold @@ -468,6 +480,13 @@ Arena::Finalize () initialized = false; + // we reset Arenas unless they are the default "CPU malloc/free" BArena + // this is because we want to allow users to free their UB objects + // that they forgot to destruct after amrex::Finalize(): + // amrex::Initialize(...); + // MultiFab mf(...); // this should be scoped in { ... } + // amrex::Finalize(); + // mf cannot be used now, but it can at least be freed without a segfault if (!dynamic_cast(the_device_arena)) { if (the_device_arena != the_arena) { delete the_device_arena; diff --git a/Src/Base/AMReX_Array4.H b/Src/Base/AMReX_Array4.H index 0fc4c049437..296762614d3 100644 --- a/Src/Base/AMReX_Array4.H +++ b/Src/Base/AMReX_Array4.H @@ -11,6 +11,50 @@ namespace amrex { + template + struct CellData // Data in a single cell + { + T* AMREX_RESTRICT p = nullptr; + Long stride = 0; + int ncomp = 0; + + AMREX_GPU_HOST_DEVICE + constexpr CellData (T* a_p, Long a_stride, int a_ncomp) + : p(a_p), stride(a_stride), ncomp(a_ncomp) + {} + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE + constexpr CellData (CellData::type> const& rhs) noexcept + : p(rhs.p), stride(rhs.stride), ncomp(rhs.ncomp) + {} + + AMREX_GPU_HOST_DEVICE + explicit operator bool() const noexcept { return p != nullptr; } + + AMREX_GPU_HOST_DEVICE + int nComp() const noexcept { return ncomp; } + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + U& operator[] (int n) const noexcept { +#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) + if (n < 0 || n >= ncomp) { +#if AMREX_DEVICE_COMPILE + AMREX_DEVICE_PRINTF(" %d is out of bound (0:%d)", n, ncomp-1); +#else + std::stringstream ss; + ss << " " << n << " is out of bound: (0:" << ncomp-1 << ")"; + amrex::Abort(ss.str()); +#endif + } +#endif + return p[n*stride]; + } + }; + template struct Array4 { @@ -207,6 +251,11 @@ namespace amrex { } } #endif + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + CellData cellData (int i, int j, int k) const noexcept { + return CellData{this->ptr(i,j,k), nstride, ncomp}; + } }; template diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H index 1980c727e81..d76760df9d9 100644 --- a/Src/Base/AMReX_BCRec.H +++ b/Src/Base/AMReX_BCRec.H @@ -74,6 +74,17 @@ public: AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void setHi (int dir, int bc_val) noexcept { bc[AMREX_SPACEDIM+dir] = bc_val; } /** + * \brief Explicitly set bndry value for given face. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void set (Orientation face, int bc_val) noexcept { + if (face.isLow()) { + setLo(face.coordDir(), bc_val); + } else { + setHi(face.coordDir(), bc_val); + } + } + /** * \brief Return bndry values (used in calls to FORTRAN). */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/Base/AMReX_BC_TYPES.H b/Src/Base/AMReX_BC_TYPES.H index ea24a64addf..b735da6fddb 100644 --- a/Src/Base/AMReX_BC_TYPES.H +++ b/Src/Base/AMReX_BC_TYPES.H @@ -73,7 +73,10 @@ enum mathematicalBndryTypes : int { foextrap = 2, ext_dir = 3, hoextrap = 4, - hoextrapcc = 5 + hoextrapcc = 5, + user_1 = 1001, + user_2 = 1002, + user_3 = 1003 }; } @@ -102,4 +105,3 @@ enum mathematicalBndryTypes : int { #endif #endif - diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp index 477e0b6bac2..0c304d30011 100644 --- a/Src/Base/AMReX_BLBackTrace.cpp +++ b/Src/Base/AMReX_BLBackTrace.cpp @@ -5,6 +5,9 @@ #include #include #include +#ifdef AMREX_USE_MPI +#include +#endif #ifdef AMREX_TINY_PROFILING #include @@ -71,7 +74,15 @@ BLBackTrace::handler(int s) std::string errfilename; { std::ostringstream ss; - ss << "Backtrace." << ParallelDescriptor::MyProc(); +#ifdef AMREX_USE_MPI + if (MPMD::Initialized()) { + ss << "Backtrace.prog" << MPMD::MyProgId() << "."; + } else +#endif + { + ss << "Backtrace."; + } + ss << ParallelDescriptor::MyProc(); #ifdef AMREX_USE_OMP ss << "." << omp_get_thread_num(); #endif diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 3a9f5eea018..f0e50ecac48 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -260,7 +260,7 @@ public: */ void clear () noexcept; - // Release ownership of memory + //! Release ownership of memory std::unique_ptr release () noexcept; //! Returns how many bytes used @@ -350,10 +350,22 @@ public: * order, with the component index coming last. In other words, * dataPtr returns a pointer to all the Nth components. */ - T* dataPtr (int n = 0) noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + T* dataPtr (int n = 0) noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } //! Same as above except works on const FABs. - const T* dataPtr (int n = 0) const noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + const T* dataPtr (int n = 0) const noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } T* dataPtr (const IntVect& iv, int n = 0) noexcept; @@ -1882,9 +1894,9 @@ BaseFab::define () { AMREX_ASSERT(this->dptr == 0); AMREX_ASSERT(this->domain.numPts() > 0); - AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); AMREX_ASSERT(this->nvar >= 0); if (this->nvar == 0) return; + AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); this->truesize = this->nvar*this->domain.numPts(); this->ptr_owner = true; diff --git a/Src/Base/AMReX_Box.cpp b/Src/Base/AMReX_Box.cpp index f93818e784d..e61942c2a48 100644 --- a/Src/Base/AMReX_Box.cpp +++ b/Src/Base/AMReX_Box.cpp @@ -126,7 +126,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; @@ -161,7 +161,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H index 04e93eab97e..1dc8f15c536 100644 --- a/Src/Base/AMReX_BoxList.H +++ b/Src/Base/AMReX_BoxList.H @@ -206,9 +206,9 @@ public: BoxList& convert (IndexType typ) noexcept; //! Returns a reference to the Vector. - Vector& data() noexcept { return m_lbox; } + Vector& data () noexcept { return m_lbox; } //! Returns a constant reference to the Vector. - const Vector& data() const noexcept { return m_lbox; } + const Vector& data () const noexcept { return m_lbox; } void swap (BoxList& rhs) { std::swap(m_lbox, rhs.m_lbox); diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H new file mode 100644 index 00000000000..e79122de24d --- /dev/null +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -0,0 +1,331 @@ +#ifndef AMREX_CTO_PARALLEL_FOR_H_ +#define AMREX_CTO_PARALLEL_FOR_H_ + +#include +#include +#include + +#include +#include + +/* This header is not for the users to include directly. It's meant to be + * included in AMReX_GpuLaunch.H, which has included the headers needed + * here. */ + +/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */ + +namespace amrex { + +template +struct CompileTimeOptions { + // TypeList is defined in AMReX_Tuple.H + using list_type = TypeList...>; +}; + +#if (__cplusplus >= 201703L) + +//namespace meta +//{ + template + constexpr auto operator+ (TypeList, TypeList) { + return TypeList{}; + } + + template + constexpr auto single_product (TypeList, A) { + return TypeList{})...>{}; + } + + template + constexpr auto operator* (LLs, TypeList) { + return (TypeList<>{} + ... + single_product(LLs{}, As{})); + } + + template + constexpr auto cartesian_product_n (TypeList) { + return (TypeList>{} * ... * Ls{}); + } +//} + +namespace detail +{ + template + std::enable_if_t::value || std::is_same::value, bool> + ParallelFor_helper2 (T const& N, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + if constexpr (std::is_integral::value) { + ParallelFor(N, [f] AMREX_GPU_DEVICE (T i) noexcept + { + f(i, As{}...); + }); + } else { + ParallelFor(N, [f] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + f(i, j, k, As{}...); + }); + } + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value, bool> + ParallelFor_helper2 (Box const& box, T ncomp, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + ParallelFor(box, ncomp, [f] AMREX_GPU_DEVICE (int i, int j, int k, T n) noexcept + { + f(i, j, k, n, As{}...); + }); + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value || std::is_same::value> + ParallelFor_helper1 (T const& N, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(N, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } + + template + std::enable_if_t::value> + ParallelFor_helper1 (Box const& box, T ncomp, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(box, ncomp, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } +} + +#endif + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + T N, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(N, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(N, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +void ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, T ncomp, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, ncomp, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, ncomp, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + N, [=] AMREX_GPU_DEVICE (int i, auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param N an interger specifying the 1D for loop's range. + * \param f a callable object taking an integer and working on that iteration. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + T N, F&& f) +{ + ParallelFor(ctos, option, N, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the 3D for loop's range. + * \param f a callable object taking three integers and working on the given cell. + */ +template +void ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, F&& f) +{ + ParallelFor(ctos, option, box, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the iteration in 3D space. + * \param ncomp an integer specifying the range for iteration over components. + * \param f a callable object taking three integers and working on the given cell. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, T ncomp, F&& f) +{ + ParallelFor(ctos, option, box, ncomp, std::forward(f)); +} + +} + +#endif diff --git a/Src/Base/AMReX_DistributionMapping.cpp b/Src/Base/AMReX_DistributionMapping.cpp index a61d5b2f591..6b4c0c8925c 100644 --- a/Src/Base/AMReX_DistributionMapping.cpp +++ b/Src/Base/AMReX_DistributionMapping.cpp @@ -1300,7 +1300,7 @@ DistributionMapping::SFCProcessorMap (const BoxArray& boxes, for (int i = 0, N = boxes.size(); i < N; ++i) { - wgts.push_back(boxes[i].volume()); + wgts.push_back(boxes[i].numPts()); } SFCProcessorMapDoIt(boxes,wgts,nprocs); @@ -1769,7 +1769,7 @@ DistributionMapping::makeSFC (const BoxArray& ba, bool use_box_vol, const int np { const Box& bx = ba[i]; tokens.push_back(makeSFCToken(i, bx.smallEnd())); - const Long v = use_box_vol ? bx.volume() : Long(1); + const Long v = use_box_vol ? bx.numPts() : Long(1); vol_sum += v; wgts.push_back(v); } diff --git a/Src/Base/AMReX_Extension.H b/Src/Base/AMReX_Extension.H index a084777f1a0..753b43995f3 100644 --- a/Src/Base/AMReX_Extension.H +++ b/Src/Base/AMReX_Extension.H @@ -57,7 +57,7 @@ #elif defined(__INTEL_COMPILER) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") -#elif defined(_CRAYC) +#elif defined(_CRAYC) || defined(__cray__) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") #elif defined(__PGI) @@ -73,7 +73,7 @@ #define AMREX_PRAGMA_SIMD _Pragma("ibm independent_loop") #elif defined(__clang__) -#define AMREX_PRAGMA_SIMD _Pragma("clang loop vectorize(enable)") +#define AMREX_PRAGMA_SIMD #elif defined(__GNUC__) #define AMREX_PRAGMA_SIMD _Pragma("GCC ivdep") diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index 3d3cda3674b..b678986c0e9 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -272,7 +272,7 @@ public: virtual ~FArrayBox () noexcept override {} FArrayBox (FArrayBox&& rhs) noexcept = default; - FArrayBox& operator= (FArrayBox&&) = default; + FArrayBox& operator= (FArrayBox&&) noexcept = default; FArrayBox (const FArrayBox&) = delete; FArrayBox& operator= (const FArrayBox&) = delete; diff --git a/Src/Base/AMReX_FBI.H b/Src/Base/AMReX_FBI.H index 61ef452b601..cc0bfeecbce 100644 --- a/Src/Base/AMReX_FBI.H +++ b/Src/Base/AMReX_FBI.H @@ -924,7 +924,7 @@ FabArray::pack_send_buffer_cpu (FabArray const& src, int scomp, int nc amrex::LoopConcurrentOnCpu( bx, ncomp, [=] (int ii, int jj, int kk, int n) noexcept { - pfab(ii,jj,kk,n) = sfab(ii,jj,kk,n+scomp); + pfab(ii,jj,kk,n) = static_cast(sfab(ii,jj,kk,n+scomp)); }); dptr += (bx.numPts() * ncomp * sizeof(BUF)); } diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index 6eef7caa579..736c39567ae 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -438,6 +438,15 @@ public: */ bool ok () const; + /** Has define() been called on this rank? + * + * \return true if `define` has been called on this `FabArray`. Note that all constructors except `FabArray ()` + * and `FabArray(Arena*a)` call `define`, even if the `MFInfo` argument has `alloc=false`. One could + * also use `FabArrayBase::empty()` to find whether `define` is called or not, although they are not exactly + * the same. + */ + bool isDefined () const; + //! Return a constant reference to the FAB associated with mfi. const FAB& operator[] (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); } @@ -1128,6 +1137,7 @@ protected: std::unique_ptr > m_factory; DataAllocator m_dallocator; + //! has define() been called? bool define_function_called = false; // @@ -1768,6 +1778,13 @@ FabArray::ok () const return isok == 1; } +template +bool +FabArray::isDefined () const +{ + return define_function_called; +} + template void FabArray::define (const BoxArray& bxs, @@ -2848,7 +2865,7 @@ FabArray::SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_ngho FabArray* tmp = new FabArray( boxArray(), DistributionMap(), ncomp, src_nghost, MFInfo(), Factory() ); amrex::Copy(*tmp, *this, scomp, 0, ncomp, src_nghost); - this->setVal(0.0, scomp, ncomp, dst_nghost); + this->setVal(typename FAB::value_type(0), scomp, ncomp, dst_nghost); this->ParallelCopy_nowait(*tmp,0,scomp,ncomp,src_nghost,dst_nghost,period,FabArrayBase::ADD); // All local. Operation complete. diff --git a/Src/Base/AMReX_FabArrayCommI.H b/Src/Base/AMReX_FabArrayCommI.H index c894fe0b2c7..3d3fe1743a2 100644 --- a/Src/Base/AMReX_FabArrayCommI.H +++ b/Src/Base/AMReX_FabArrayCommI.H @@ -10,7 +10,7 @@ FabArray::FBEP_nowait (int scomp, int ncomp, const IntVect& nghost, bool enforce_periodicity_only, bool override_sync) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: FB"); BL_PROFILE("FillBoundary_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!fbd, "FillBoundary_nowait() called when comm operation already in progress."); @@ -316,7 +316,7 @@ FabArray::ParallelCopy_nowait (const FabArray& src, const FabArrayBase::CPC * a_cpc, bool to_ghost_cells_only) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC"); BL_PROFILE("FabArray::ParallelCopy_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!pcd, "ParallelCopy_nowait() called when comm operation already in progress."); diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 54a8b8630d3..890ec2e0f7e 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -67,6 +67,56 @@ public: int coord; }; + namespace detail { + template + T bisect_prob_lo (amrex::Real plo, amrex::Real /*phi*/, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T lo = static_cast(plo + tol); + bool safe; + { + int i = int(Math::floor((lo - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return lo; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T hi = static_cast(plo + 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + + template + T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T hi = static_cast(phi - tol); + bool safe; + { + int i = int(Math::floor((hi - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return hi; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T lo = static_cast(phi - 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + } + class Geometry : public CoordSys @@ -168,8 +218,6 @@ public: //! Returns the problem domain. const RealBox& ProbDomain () const noexcept { return prob_domain; } - //! Returns the roundoff domain. - const RealBox& RoundoffDomain () const noexcept { return roundoff_domain; } //! Sets the problem domain. void ProbDomain (const RealBox& rb) noexcept { @@ -193,12 +241,19 @@ public: return {{AMREX_D_DECL(prob_domain.hi(0),prob_domain.hi(1),prob_domain.hi(2))}}; } - GpuArray RoundoffLoArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.lo(0),roundoff_domain.lo(1),roundoff_domain.lo(2))}}; + GpuArray ProbLoArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_lo_f; +#else + return roundoff_lo_d; +#endif } - - GpuArray RoundoffHiArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.hi(0),roundoff_domain.hi(1),roundoff_domain.hi(2))}}; + GpuArray ProbHiArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_hi_f; +#else + return roundoff_hi_d; +#endif } //! Returns the overall size of the domain by multiplying the ProbLength's together @@ -365,9 +420,13 @@ public: const Box& src, Vector& out) const noexcept; + //! Return domain box with non-periodic directions grown by ngrow. + Box growNonPeriodicDomain (IntVect const& ngrow) const noexcept; //! Return domain box with non-periodic directions grown by ngrow. Box growNonPeriodicDomain (int ngrow) const noexcept; //! Return domain box with periodic directions grown by ngrow. + Box growPeriodicDomain (IntVect const& ngrow) const noexcept; + //! Return domain box with periodic directions grown by ngrow. Box growPeriodicDomain (int ngrow) const noexcept; //! Set periodicity flags and return the old flags. @@ -406,7 +465,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Returns true if a point is inside the roundoff domain. @@ -414,7 +473,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Compute the roundoff domain. Public because it contains an @@ -430,10 +489,11 @@ private: RealBox prob_domain; // Due to round-off errors, not all floating point numbers for which plo >= x < phi - // will map to a cell that is inside "domain". "roundoff_domain" stores a phi - // that is very close to that in prob_domain, and for which all floating point numbers - // inside it according to a naive inequality check will map to a cell inside domain. - RealBox roundoff_domain; + // will map to a cell that is inside "domain". "roundoff_{lo,hi}_{f,d}" each store + // a position that is very close to that in prob_domain, and for which all doubles and floats less than + // it will map to a cell inside domain. + GpuArray roundoff_lo_d, roundoff_hi_d; + GpuArray roundoff_lo_f, roundoff_hi_f; // Box domain; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 395f17e352b..235c7bb7674 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -473,29 +473,41 @@ Geometry::periodicShift (const Box& target, } Box -Geometry::growNonPeriodicDomain (int ngrow) const noexcept +Geometry::growNonPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (!isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } Box -Geometry::growPeriodicDomain (int ngrow) const noexcept +Geometry::growPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } +Box +Geometry::growNonPeriodicDomain (int ngrow) const noexcept +{ + return growNonPeriodicDomain(IntVect(ngrow)); +} + +Box +Geometry::growPeriodicDomain (int ngrow) const noexcept +{ + return growPeriodicDomain(IntVect(ngrow)); +} + void Geometry::computeRoundoffDomain () { @@ -506,50 +518,48 @@ Geometry::computeRoundoffDomain () inv_dx[k] = 1.0_rt/dx[k]; } - roundoff_domain = prob_domain; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { int ilo = Domain().smallEnd(idim); int ihi = Domain().bigEnd(idim); Real plo = ProbLo(idim); Real phi = ProbHi(idim); - Real idx = InvCellSize(idim); + Real dxinv = InvCellSize(idim); Real deltax = CellSize(idim); -#ifdef AMREX_SINGLE_PRECISION_PARTICLES - Real tolerance = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); -#else - Real tolerance = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); -#endif - // bisect the point at which the cell no longer maps to inside the domain - Real lo = static_cast(phi) - Real(0.5)*static_cast(deltax); - Real hi = static_cast(phi) + Real(0.5)*static_cast(deltax); - - Real mid = bisect(lo, hi, - [=] AMREX_GPU_HOST_DEVICE (Real x) -> Real - { - int i = int(Math::floor((x - plo)*idx)) + ilo; - bool inside = i >= ilo && i <= ihi; - return static_cast(inside) - Real(0.5); - }, tolerance); - roundoff_domain.setHi(idim, mid - tolerance); + Real ftol = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); + Real dtol = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); + + roundoff_lo_f[idim] = detail::bisect_prob_lo (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_lo_d[idim] = detail::bisect_prob_lo(plo, phi, dxinv, ilo, ihi, dtol); + roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, dxinv, ilo, ihi, dtol); } } bool -Geometry::outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { - bool outside = AMREX_D_TERM(x < roundoff_domain.lo(0) - || x >= roundoff_domain.hi(0), - || y < roundoff_domain.lo(1) - || y >= roundoff_domain.hi(1), - || z < roundoff_domain.lo(2) - || z >= roundoff_domain.hi(2)); +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + bool outside = AMREX_D_TERM(x < roundoff_lo_f[0] + || x >= roundoff_hi_f[0], + || y < roundoff_lo_f[1] + || y >= roundoff_hi_f[1], + || z < roundoff_lo_f[2] + || z >= roundoff_hi_f[2]); +#else + bool outside = AMREX_D_TERM(x < roundoff_lo_d[0] + || x >= roundoff_hi_d[0], + || y < roundoff_lo_d[1] + || y >= roundoff_hi_d[1], + || z < roundoff_lo_d[2] + || z >= roundoff_hi_d[2]); +#endif return outside; } bool -Geometry::insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { return !outsideRoundoffDomain(AMREX_D_DECL(x, y, z)); } diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index e6b2780abe0..a07704cb86b 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -30,15 +30,16 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; do { R const new_R = f(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&new_R)); - } while (! a.compare_exchange_strong(old_I, new_I, mo)); + } while (! a.compare_exchange_strong(old_I, new_I)); return *(reinterpret_cast(&old_I)); #else R old = *address; @@ -53,17 +54,18 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; bool test_success; do { R const tmp = op(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&tmp)); test_success = cond(tmp); - } while (test_success && ! a.compare_exchange_strong(old_I, new_I, mo)); + } while (test_success && ! a.compare_exchange_strong(old_I, new_I)); return test_success; #else R old = *address; @@ -131,9 +133,10 @@ namespace detail { return atomicAdd(sum, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(sum)}; - return a.fetch_add(value, mo); + sycl::atomic_ref a{*sum}; + return a.fetch_add(value); #else amrex::ignore_unused(sum, value); return T(); // should never get here, but have to return something @@ -313,9 +316,10 @@ namespace detail { return atomicMin(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_min(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_min(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -373,9 +377,10 @@ namespace detail { return atomicMax(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_max(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_max(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -430,9 +435,10 @@ namespace detail { return atomicOr(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_or(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_or(value); #else int const old = *m; *m = (*m) || value; @@ -451,9 +457,10 @@ namespace detail { return atomicAnd(m, value ? ~0x0 : 0); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_and(value ? ~0x0 : 0, mo); + sycl::atomic_ref a{*m}; + return a.fetch_and(value ? ~0x0 : 0); #else int const old = *m; *m = (*m) && value; @@ -472,11 +479,12 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + constexpr auto ms = sycl::memory_scope::device; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = (oldi >= value) ? 0u : (oldi+1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -509,12 +517,13 @@ namespace detail { return atomicDec(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = ((oldi == 0u) || (oldi > value)) ? value : (oldi-1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -535,9 +544,10 @@ namespace detail { return atomicExch(address, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - return sycl::atomic_exchange(a, val, mo); + sycl::atomic_ref a{*address}; + return a.exchange(val); #else auto const old = *address; *address = val; @@ -557,9 +567,10 @@ namespace detail { return atomicCAS(address, compare, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - a.compare_exchange_strong(compare, val, mo); + sycl::atomic_ref a{*address}; + a.compare_exchange_strong(compare, val); return compare; #else auto const old = *address; diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H index cc68770ff3f..faccec1d2ef 100644 --- a/Src/Base/AMReX_GpuContainers.H +++ b/Src/Base/AMReX_GpuContainers.H @@ -19,13 +19,19 @@ namespace Gpu { /** * \brief A PODVector that uses the standard memory Arena. - * Note that, on NVIDIA architectures, this Arena is actually - * managed. - * + * Note that the memory might or might not be managed depending + * on the amrex.the_arena_is_managed ParmParse parameter. */ template using DeviceVector = PODVector >; + /** + * \brief A PODVector that uses the non-managed device memory arena. + * + */ + template + using NonManagedDeviceVector = PODVector >; + /** * \brief A PODVector that uses the managed memory arena. * @@ -83,6 +89,9 @@ namespace Gpu { template using HostVector = PODVector; + template + using NonManagedVector = PODVector; + template using ManagedVector = PODVector; diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H index 8a327704a1d..a61ab4fe406 100644 --- a/Src/Base/AMReX_GpuDevice.H +++ b/Src/Base/AMReX_GpuDevice.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -148,9 +149,9 @@ public: // definition: https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/clang/lib/Basic/Targets/AMDGPU.cpp#L400 // overview wavefront size: https://github.com/llvm/llvm-project/blob/efc063b621ea0c4d1e452bcade62f7fc7e1cc937/clang/test/Driver/amdgpu-macros.cl#L70-L115 // gfx10XX has 32 threads per wavefront else 64 - static constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; + static AMREX_EXPORT constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; # else - static constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); + static AMREX_EXPORT constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); # endif static unsigned int maxBlocksPerLaunch () noexcept { return max_blocks_per_launch; } @@ -166,28 +167,28 @@ private: static void initialize_gpu (); - static int device_id; - static int num_devices_used; - static int verbose; - static int max_gpu_streams; + static AMREX_EXPORT int device_id; + static AMREX_EXPORT int num_devices_used; + static AMREX_EXPORT int verbose; + static AMREX_EXPORT int max_gpu_streams; #ifdef AMREX_USE_GPU - static dim3 numThreadsMin; - static dim3 numBlocksOverride, numThreadsOverride; + static AMREX_EXPORT dim3 numThreadsMin; + static AMREX_EXPORT dim3 numBlocksOverride, numThreadsOverride; // We build gpu_default_stream and gpu_stream_pool. // The non-owning gpu_stream is used to store the current stream that will be used. // gpu_stream is a vector so that it's thread safe to write to it. - static gpuStream_t gpu_default_stream; - static Vector gpu_stream_pool; // The size of this is max_gpu_stream - static Vector gpu_stream; // The size of this is omp_max_threads - static gpuDeviceProp_t device_prop; - static int memory_pools_supported; - static unsigned int max_blocks_per_launch; + static AMREX_EXPORT gpuStream_t gpu_default_stream; + static AMREX_EXPORT Vector gpu_stream_pool; // The size of this is max_gpu_stream + static AMREX_EXPORT Vector gpu_stream; // The size of this is omp_max_threads + static AMREX_EXPORT gpuDeviceProp_t device_prop; + static AMREX_EXPORT int memory_pools_supported; + static AMREX_EXPORT unsigned int max_blocks_per_launch; #ifdef AMREX_USE_DPCPP - static std::unique_ptr sycl_context; - static std::unique_ptr sycl_device; + static AMREX_EXPORT std::unique_ptr sycl_context; + static AMREX_EXPORT std::unique_ptr sycl_device; #endif #endif }; diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 8d42363f0a7..fe7257ea971 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -22,9 +22,9 @@ #if defined(AMREX_USE_HIP) #include #if defined(AMREX_USE_ROCTX) -#include +#include #if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) -#include +#include #endif #endif #endif @@ -397,11 +397,7 @@ Device::initialize_gpu () // check compute capability - if (sizeof(Real) == 8) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeEightByte)); - } else if (sizeof(Real) == 4) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeFourByte)); - } + // AMD devices do not support shared cache banking. AMREX_HIP_SAFE_CALL(hipStreamCreate(&gpu_default_stream)); for (int i = 0; i < max_gpu_streams; ++i) { @@ -467,8 +463,8 @@ Device::initialize_gpu () device_prop.warpSize = warp_size; auto sgss = d.get_info(); device_prop.maxMemAllocSize = d.get_info(); - device_prop.managedMemory = d.get_info(); - device_prop.concurrentManagedAccess = d.get_info(); + device_prop.managedMemory = d.has(sycl::aspect::usm_host_allocations); + device_prop.concurrentManagedAccess = d.has(sycl::aspect::usm_shared_allocations); device_prop.maxParameterSize = d.get_info(); { amrex::Print() << "Device Properties:\n" diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index d31bae568c1..7e877140629 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -30,11 +30,11 @@ #define AMREX_GPU_Z_STRIDE 1 #ifdef AMREX_USE_CUDA -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - amrex::launch_global<<>>(__VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + amrex::launch_global<<>>(__VA_ARGS__) #elif defined(AMREX_USE_HIP) -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__) #endif @@ -151,6 +151,28 @@ namespace Gpu { dim3 numThreads; std::size_t sharedMem = 0; }; + + template + ExecutionConfig + makeExecutionConfig (Long N) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(N,Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } + + template + ExecutionConfig + makeExecutionConfig (const Box& box) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(box.numPts(),Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } #endif } @@ -221,6 +243,8 @@ namespace Gpu { #ifdef AMREX_USE_GPU +#ifndef AMREX_USE_DPCPP + #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ { using amrex_i_inttype = typename std::remove_const::type; \ if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ @@ -344,6 +368,111 @@ namespace Gpu { block3; \ } +#else +// xxxxx DPCPP todo: host disabled in host device + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#endif + #else #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ @@ -421,4 +550,6 @@ namespace Gpu { #endif +#include + #endif diff --git a/Src/Base/AMReX_GpuLaunchFunctsC.H b/Src/Base/AMReX_GpuLaunchFunctsC.H index 025b43fec0a..6ce9cca0f3a 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsC.H +++ b/Src/Base/AMReX_GpuLaunchFunctsC.H @@ -55,11 +55,18 @@ namespace detail { } template -void launch (T const& n, L&& f, std::size_t /*shared_mem_bytes*/=0) noexcept +void launch (T const& n, L&& f) noexcept { f(n); } +template +void launch (T const& n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + f(n); +} + template ::value> > void For (T n, L&& f) noexcept { @@ -68,12 +75,26 @@ void For (T n, L&& f) noexcept } } +template ::value> > +void For (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void ParallelFor (T n, L&& f) noexcept { @@ -83,12 +104,26 @@ void ParallelFor (T n, L&& f) noexcept } } +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template void For (Box const& box, L&& f) noexcept { @@ -101,12 +136,26 @@ void For (Box const& box, L&& f) noexcept }}} } +template +void For (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box, std::forward(f)); } +template +void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void ParallelFor (Box const& box, L&& f) noexcept { @@ -120,12 +169,26 @@ void ParallelFor (Box const& box, L&& f) noexcept }}} } +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box, std::forward(f)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { @@ -140,12 +203,26 @@ void For (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box, ncomp, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { @@ -161,12 +238,26 @@ void ParallelFor (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box, ncomp, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -174,12 +265,27 @@ void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept For(box2, std::forward(f2)); } +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For (box1, box2, std::forward(f1), std::forward(f2)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For (box1, box2, std::forward(f1), std::forward(f2)); +} + template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -188,12 +294,28 @@ void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L For(box3, std::forward(f3)); } +template +void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); + For(box3, std::forward(f3)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -204,6 +326,17 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -214,6 +347,17 @@ void For (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -227,6 +371,20 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); + For(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -241,6 +399,21 @@ void For (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -248,12 +421,27 @@ void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept ParallelFor(box2, std::forward(f2)); } +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,f1,f2); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,f1,f2); +} + template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -262,12 +450,28 @@ void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2 ParallelFor(box3, std::forward(f3)); } +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); + ParallelFor(box3, std::forward(f3)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -278,6 +482,17 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -289,6 +504,18 @@ void ParallelFor (Gpu::KernelInfo const&, box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -302,6 +529,20 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); + ParallelFor(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -316,30 +557,73 @@ void ParallelFor (Gpu::KernelInfo const&, box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -347,6 +631,14 @@ void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -356,6 +648,16 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -369,30 +671,72 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -400,6 +744,14 @@ void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -409,6 +761,16 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -422,30 +784,72 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -454,6 +858,15 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -464,6 +877,17 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -478,30 +902,73 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -510,6 +977,15 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -520,6 +996,17 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -534,6 +1021,21 @@ void HostDeviceFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void ParallelForRNG (T n, L&& f) noexcept { diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index 12206f69b70..7940b5589a0 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -64,11 +64,24 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe } } -template +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + launch(nblocks, MT, shared_mem_bytes, stream, std::forward(f)); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + launch(nblocks, MT, stream, std::forward(f)); +} + +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -139,11 +152,11 @@ namespace detail { } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -186,7 +199,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -195,7 +208,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -250,7 +263,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -259,7 +272,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) n const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -437,7 +450,7 @@ void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) return; @@ -452,7 +465,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -491,7 +504,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -513,7 +526,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -561,7 +574,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value> > void ParallelFor (Gpu::KernelInfo const& /*info*/, @@ -580,7 +593,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -623,7 +636,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -649,7 +662,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -709,16 +722,34 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, template void single_task (gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(1, 1, 0, stream, + AMREX_LAUNCH_KERNEL(Gpu::Device::warp_size, 1, 1, 0, stream, [=] AMREX_GPU_DEVICE () noexcept {f();}); AMREX_GPU_ERROR_CHECK(); } +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, shared_mem_bytes, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, 0, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + template void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes, gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(nblocks, nthreads_per_block, shared_mem_bytes, + AMREX_ASSERT(nthreads_per_block <= AMREX_GPU_MAX_THREADS); + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, nblocks, nthreads_per_block, shared_mem_bytes, stream, [=] AMREX_GPU_DEVICE () noexcept { f(); }); AMREX_GPU_ERROR_CHECK(); } @@ -729,12 +760,12 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe launch(nblocks, nthreads_per_block, 0, stream, std::forward(f)); } -template +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (auto const i : Gpu::Range(n)) { f(i); @@ -793,13 +824,13 @@ namespace detail { } } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (T i = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; i < n; i += stride) { @@ -809,7 +840,7 @@ ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { @@ -819,8 +850,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) @@ -837,7 +868,7 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { @@ -847,8 +878,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -871,7 +902,8 @@ ParallelForRNG (T n, L&& f) noexcept if (amrex::isEmpty(n)) return; randState_t* rand_state = getRandState(); const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -896,7 +928,8 @@ ParallelForRNG (Box const& box, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -927,7 +960,8 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -948,7 +982,7 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept @@ -965,8 +999,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -993,7 +1027,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -1016,8 +1050,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1053,7 +1087,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value> @@ -1073,8 +1107,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1105,7 +1139,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -1132,8 +1166,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1183,29 +1217,127 @@ void single_task (L&& f) noexcept single_task(Gpu::gpuStream(), std::forward(f)); } +template +void launch (T const& n, L&& f) noexcept +{ + launch(n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n, std::forward(f)); +} + +template +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info, box, ncomp, std::forward(f)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info, box1, box2, std::forward(f1), + std::forward(f2)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info, box1, box2, box3, std::forward(f1), + std::forward(f2), std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - ParallelFor(info, n,std::forward(f)); + ParallelFor(info, n,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); +} + +template +void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box,std::forward(f)); } template ::value> > void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(info,box,ncomp,std::forward(f)); + ParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info,box,ncomp,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1213,7 +1345,15 @@ void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1244,32 +1409,63 @@ void For (Gpu::KernelInfo const& info, template ::value> > void ParallelFor (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template void ParallelFor (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); +} + +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); } template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1298,32 +1517,63 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, template ::value> > void For (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); +} + +template ::value> > +void For (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); } template void For (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); +} + +template +void For (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); } template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1354,10 +1627,30 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,n,std::forward(f)); + ParallelFor(info,n,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) f(i); +#endif + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,n,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + AMREX_PRAGMA_SIMD + for (T i = 0; i < n; ++i) f(i); +#endif } } @@ -1365,7 +1658,14 @@ template ::value> HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template @@ -1373,9 +1673,28 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,std::forward(f)); +#endif + } +} + +template +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box,std::forward(f)); +#endif } } @@ -1384,9 +1703,28 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,ncomp,std::forward(f)); + ParallelFor(info, box,ncomp,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,ncomp,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif } } @@ -1396,26 +1734,51 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif } } -template +template +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,std::forward(f1)); + LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif + } +} + +template std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,box3, + ParallelFor(info,box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); LoopConcurrentOnCpu(box3,std::forward(f3)); +#endif } } @@ -1428,10 +1791,34 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box2, T2 ncomp2, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif + } +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif } } @@ -1446,40 +1833,95 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box3, T3 ncomp3, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif + } +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); + LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif } } template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - HostDeviceParallelFor(info,n,std::forward(f)); + HostDeviceParallelFor(info,n,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + HostDeviceParallelFor(info,n,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - HostDeviceParallelFor(info,box,std::forward(f)); + HostDeviceParallelFor(info,box,std::forward(f)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,std::forward(f)); } template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(info,box,ncomp,std::forward(f)); + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1487,7 +1929,16 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(info, box1,box2,box3, + HostDeviceParallelFor(info, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1498,7 +1949,17 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, T1 ncomp1, L1&& f1, Box const& box2, T2 ncomp2, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1519,32 +1995,64 @@ void HostDeviceFor (Gpu::KernelInfo const& info, template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); } template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); +} + +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); } template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1554,7 +2062,16 @@ template (f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); diff --git a/Src/Base/AMReX_GpuLaunchMacrosG.H b/Src/Base/AMReX_GpuLaunchMacrosG.H index 89aa1f24bc9..e1c643454bc 100644 --- a/Src/Base/AMReX_GpuLaunchMacrosG.H +++ b/Src/Base/AMReX_GpuLaunchMacrosG.H @@ -29,10 +29,16 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \ { auto const& amrex_i_tn = TN; \ @@ -40,7 +46,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -93,6 +99,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -100,6 +110,8 @@ block2 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \ @@ -111,7 +123,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -179,6 +191,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -189,6 +205,8 @@ block3 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \ @@ -202,7 +220,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -269,7 +287,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -333,7 +351,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -410,7 +428,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -434,6 +452,18 @@ // FOR_1D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ +{ \ + auto const& amrex_i_n = n; \ + using amrex_i_inttype = typename std::remove_const::type; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ { \ auto const& amrex_i_n = n; \ @@ -446,6 +476,7 @@ for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \ { \ @@ -455,6 +486,17 @@ // FOR_3D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ +{ \ + auto const& amrex_i_box = box; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ { \ auto const& amrex_i_box = box; \ @@ -464,6 +506,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \ { \ @@ -472,6 +515,18 @@ // FOR_4D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + auto const& amrex_i_box = box; \ + auto const& amrex_i_ncomp = ncomp; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ auto const& amrex_i_box = box; \ @@ -482,6 +537,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index ce07a3e52c2..b5d5ea58fbd 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -41,10 +41,6 @@ # include -namespace amrex { - namespace oneapi = sycl::ext::oneapi; -} - # define AMREX_REQUIRE_SUBGROUP_SIZE(x) \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic ignored \"-Wattributes\"") \ diff --git a/Src/Base/AMReX_GpuReduce.H b/Src/Base/AMReX_GpuReduce.H index 9b48138940c..7b9b0e42355 100644 --- a/Src/Base/AMReX_GpuReduce.H +++ b/Src/Base/AMReX_GpuReduce.H @@ -8,6 +8,7 @@ #include #include #include +#include #if !defined(AMREX_USE_CUB) && defined(AMREX_USE_CUDA) && defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 11) #define AMREX_USE_CUB 1 @@ -54,10 +55,10 @@ template struct warpReduce { AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T operator() (T x, amrex::oneapi::sub_group const& sg) const noexcept + T operator() (T x, sycl::sub_group const& sg) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - T y = sg.shuffle_down(x, offset); + T y = sycl::shift_group_left(sg, x, offset); x = F()(x,y); } return x; @@ -70,7 +71,7 @@ T blockReduce (T x, WARPREDUCE && warp_reduce, T x0, Gpu::Handler const& h) { T* shared = (T*)h.local; int tid = h.item->get_local_id(0); - amrex::oneapi::sub_group const& sg = h.item->get_sub_group(); + sycl::sub_group const& sg = h.item->get_sub_group(); int lane = sg.get_local_id()[0]; int wid = sg.get_group_id()[0]; int numwarps = sg.get_group_range()[0]; @@ -93,7 +94,7 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void blockReduce_partial (T* dest, T x, WARPREDUCE && warp_reduce, ATOMICOP && atomic_op, Gpu::Handler const& handler) { - amrex::oneapi::sub_group const& sg = handler.item->get_sub_group(); + sycl::sub_group const& sg = handler.item->get_sub_group(); int wid = sg.get_group_id()[0]; if ((wid+1)*warpSize <= handler.numActiveThreads) { x = warp_reduce(x, sg); // full warp @@ -249,15 +250,54 @@ void deviceReduceLogicalOr (int * dest, int source, Gpu::Handler const& h) noexc #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +namespace detail { + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T shuffle_down (T x, int offset) noexcept +{ + return AMREX_HIP_OR_CUDA(__shfl_down(x, offset), + __shfl_down_sync(0xffffffff, x, offset)); +} + +// If other sizeof is needed, we can implement it later. +template = 0> +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T multi_shuffle_down (T x, int offset) noexcept +{ + constexpr int nwords = (sizeof(T) + sizeof(unsigned int) - 1) / sizeof(unsigned int); + T y; + auto py = reinterpret_cast(&y); + auto px = reinterpret_cast(&x); + for (int i = 0; i < nwords; ++i) { + py[i] = shuffle_down(px[i],offset); + } + return y; +} + +} + template struct warpReduce { + // Not all arithmetic types can be taken by shuffle_down, but it's good enough. + template ::value,int> = 0> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T operator() (T x) const noexcept + { + for (int offset = warpSize/2; offset > 0; offset /= 2) { + T y = detail::shuffle_down(x, offset); + x = F()(x,y); + } + return x; + } + + template ::value,int> = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator() (T x) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - AMREX_HIP_OR_CUDA(T y = __shfl_down(x, offset);, - T y = __shfl_down_sync(0xffffffff, x, offset); ) + T y = detail::multi_shuffle_down(x, offset); x = F()(x,y); } return x; diff --git a/Src/Base/AMReX_GpuTypes.H b/Src/Base/AMReX_GpuTypes.H index 737a47e665c..12b8fbc1829 100644 --- a/Src/Base/AMReX_GpuTypes.H +++ b/Src/Base/AMReX_GpuTypes.H @@ -8,7 +8,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { diff --git a/Src/Base/AMReX_MFIter.H b/Src/Base/AMReX_MFIter.H index eb259ac7b6d..9c01e38b138 100644 --- a/Src/Base/AMReX_MFIter.H +++ b/Src/Base/AMReX_MFIter.H @@ -164,6 +164,8 @@ public: static int allowMultipleMFIters (int allow); + void Finalize (); + protected: std::unique_ptr m_fa; //!< This must be the first member! @@ -180,6 +182,7 @@ protected: IndexType typ; bool dynamic; + bool finalized = false; struct DeviceSync { DeviceSync () = default; diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index e8a97256d3d..c761c466449 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -209,6 +209,19 @@ MFIter::MFIter (const FabArrayBase& fabarray_, const MFItInfo& info) MFIter::~MFIter () { + Finalize(); +} + +void +MFIter::Finalize () +{ + // avoid double finalize + if (finalized) return; + finalized = true; + + // mark as invalid + currentIndex = endIndex; + #ifdef AMREX_USE_OMP #pragma omp master #endif @@ -237,6 +250,9 @@ MFIter::~MFIter () #endif m_fa->clearThisBD(); } + if (m_fa) { + m_fa.reset(nullptr); + } } void diff --git a/Src/Base/AMReX_MPMD.H b/Src/Base/AMReX_MPMD.H new file mode 100644 index 00000000000..2b8ef399866 --- /dev/null +++ b/Src/Base/AMReX_MPMD.H @@ -0,0 +1,178 @@ +#ifndef AMREX_MPMD_H_ +#define AMREX_MPMD_H_ +#include + +#ifdef AMREX_USE_MPI + +#include + +#include + +namespace amrex { namespace MPMD { + +MPI_Comm Initialize (int argc, char* argv[]); + +void Finalize (); + +bool Initialized (); + +int MyProc (); //! Process ID in MPI_COMM_WORLD +int NProcs (); //! Number of processes in MPI_COMM_WORLD +int MyProgId (); //! Program ID + +class Copier +{ +public: + Copier (BoxArray const& ba, DistributionMapping const& dm); + + template + void send (FabArray const& fa, int icomp, int ncomp) const; + + template + void recv (FabArray& fa, int icomp, int ncomp) const; + +private: + std::map m_SndTags; + std::map m_RcvTags; +}; + +template +void Copier::send (FabArray const& mf, int icomp, int ncomp) const +{ + const int N_snds = m_SndTags.size(); + + if (N_snds == 0) return; + + // Prepare buffer + + Vector send_data; + Vector send_size; + Vector send_rank; + Vector send_reqs; + Vector send_cctc; + + Vector offset; + std::size_t total_volume = 0; + for (auto const& kv : m_SndTags) { + auto const& cctc = kv.second; + + std::size_t nbytes = 0; + for (auto const& cct : cctc) { + nbytes += cct.sbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned + + // Also need to align the offset properly + total_volume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), total_volume); + + offset.push_back(total_volume); + total_volume += nbytes; + + send_data.push_back(nullptr); + send_size.push_back(nbytes); + send_rank.push_back(kv.first); + send_reqs.push_back(MPI_REQUEST_NULL); + send_cctc.push_back(&cctc); + } + + Gpu::PinnedVector send_buffer(total_volume); + char* the_send_data = send_buffer.data(); + for (int i = 0; i < N_snds; ++i) { + send_data[i] = the_send_data + offset[i]; + } + + // Pack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.pack_send_buffer_gpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } else +#endif + { + mf.pack_send_buffer_cpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } + + // Send + for (int i = 0; i < N_snds; ++i) { + send_reqs[i] = ParallelDescriptor::Asend + (send_data[i], send_size[i], send_rank[i], 100, MPI_COMM_WORLD).req(); + } + Vector stats(N_snds); + ParallelDescriptor::Waitall(send_reqs, stats); +} + +template +void Copier::recv (FabArray& mf, int icomp, int ncomp) const +{ + const int N_rcvs = m_RcvTags.size(); + + if (N_rcvs == 0) return; + + // Prepare buffer + + Vector recv_data; + Vector recv_size; + Vector recv_from; + Vector recv_reqs; + + Vector offset; + std::size_t TotalRcvsVolume = 0; + for (auto const& kv : m_RcvTags) { + std::size_t nbytes = 0; + for (auto const& cct : kv.second) { + nbytes += cct.dbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned + + // Also need to align the offset properly + TotalRcvsVolume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), TotalRcvsVolume); + + offset.push_back(TotalRcvsVolume); + TotalRcvsVolume += nbytes; + + recv_data.push_back(nullptr); + recv_size.push_back(nbytes); + recv_from.push_back(kv.first); + recv_reqs.push_back(MPI_REQUEST_NULL); + } + + Gpu::PinnedVector recv_buffer(TotalRcvsVolume); + char* the_recv_data = recv_buffer.data(); + + // Recv + for (int i = 0; i < N_rcvs; ++i) { + recv_data[i] = the_recv_data + offset[i]; + recv_reqs[i] = ParallelDescriptor::Arecv + (recv_data[i], recv_size[i], recv_from[i], 100, MPI_COMM_WORLD).req(); + } + + Vector recv_cctc(N_rcvs, nullptr); + for (int i = 0; i < N_rcvs; ++i) { + recv_cctc[i] = &(m_RcvTags.at(recv_from[i])); + } + + Vector stats(N_rcvs); + ParallelDescriptor::Waitall(recv_reqs, stats); + + // Unpack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.unpack_recv_buffer_gpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } else +#endif + { + mf.unpack_recv_buffer_cpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } +} + +}} + +#endif +#endif diff --git a/Src/Base/AMReX_MPMD.cpp b/Src/Base/AMReX_MPMD.cpp new file mode 100644 index 00000000000..917c741c2a6 --- /dev/null +++ b/Src/Base/AMReX_MPMD.cpp @@ -0,0 +1,225 @@ +#include +#include + +#include +#include +#include +#include +#include + +#ifdef AMREX_USE_MPI + +namespace amrex { namespace MPMD { + +namespace { + bool initialized = false; + bool mpi_initialized_by_us = false; + MPI_Comm app_comm = MPI_COMM_NULL; + int myproc; + int nprocs; +} + +namespace { + +template +int num_unique_elements (std::vector& v) +{ + std::sort(v.begin(), v.end()); + auto last = std::unique(v.begin(), v.end()); + return last - v.begin(); +} + +} + +MPI_Comm Initialize (int argc, char* argv[]) +{ + initialized = true; + int flag; + MPI_Initialized(&flag); + if (!flag) { + MPI_Init(&argc, &argv); + mpi_initialized_by_us = true; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int* p; + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_APPNUM, &p, &flag); + int appnum = *p; + + std::vector all_appnum(nprocs); + MPI_Allgather(&appnum, 1, MPI_INT, all_appnum.data(), 1, MPI_INT, MPI_COMM_WORLD); + int napps = num_unique_elements(all_appnum); + + // MPI_APPNUM does not appear to work with slurm on some systems. + if (napps != 2) { + std::vector all_argc(nprocs); + MPI_Allgather(&argc, 1, MPI_INT, all_argc.data(), 1, MPI_INT, MPI_COMM_WORLD); + napps = num_unique_elements(all_argc); + if (napps == 2) { + appnum = static_cast(argc != all_argc[0]); + } + } + + if (napps != 2) { + std::string exename; + if (argc > 0) { + exename = std::string(argv[0]); + } + unsigned long long hexe = std::hash{}(exename); + std::vector all_hexe(nprocs); + MPI_Allgather(&hexe, 1, MPI_UNSIGNED_LONG_LONG, + all_hexe.data(), 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD); + napps = num_unique_elements(all_hexe); + if (napps == 2) { + appnum = static_cast(hexe != all_hexe[0]); + } + } + + if (napps == 2) { + MPI_Comm_split(MPI_COMM_WORLD, appnum, myproc, &app_comm); + } else { + std::cout << "amrex::MPMD only supports two programs." << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + return app_comm; +} + +void Finalize () +{ + MPI_Comm_free(&app_comm); + if (mpi_initialized_by_us) { + MPI_Finalize(); + mpi_initialized_by_us = false; + } + initialized = false; +} + +bool Initialized () { return initialized; } + +int MyProc () +{ + return myproc; +} + +int NProcs () +{ + return nprocs; +} + +int MyProgId () +{ + return (myproc == ParallelDescriptor::MyProc()) ? 0 : 1; +} + +Copier::Copier (BoxArray const& ba, DistributionMapping const& dm) +{ + int rank_offset = myproc - ParallelDescriptor::MyProc(); + int this_root, other_root; + if (rank_offset == 0) { // First program + this_root = 0; + other_root = ParallelDescriptor::NProcs(); + } else { + this_root = rank_offset; + other_root = 0; + } + + Vector bv = ba.boxList().data(); + + int this_nboxes = ba.size(); + Vector procs = dm.ProcessorMap(); + if (rank_offset != 0) { + for (int i = 0; i < this_nboxes; ++i) { + procs[i] += rank_offset; + } + } + + Vector obv; + Vector oprocs; + int other_nboxes; + if (myproc == this_root) { + if (rank_offset == 0) // the first program + { + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD); + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + obv.resize(other_nboxes); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + oprocs.resize(other_nboxes); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + else // the second program + { + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD); + obv.resize(other_nboxes); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD); + oprocs.resize(other_nboxes); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD); + } + } + + ParallelDescriptor::Bcast(&other_nboxes, 1); + if (obv.empty()) { + obv.resize(other_nboxes); + oprocs.resize(other_nboxes); + } + ParallelDescriptor::Bcast(obv.data(), obv.size()); + ParallelDescriptor::Bcast(oprocs.data(), oprocs.size()); + + BoxArray oba(BoxList(std::move(obv))); + + // At this point, ba and bv hold our boxes, and oba holds the other + // program's boxes. procs holds mpi ranks of our boxes, and oprocs holds + // mpi ranks of the other program's boxes. All mpi ranks are in + // MPI_COMM_WORLD. + + // Build communication meta-data + + AMREX_ALWAYS_ASSERT(ba.ixType().cellCentered()); + + std::vector > isects; + + for (int i = 0; i < this_nboxes; ++i) { + if (procs[i] == myproc) { + oba.intersections(bv[i], isects); + for (auto const& isec : isects) { + const int oi = isec.first; + const Box& bx = isec.second; + const int orank = oprocs[oi]; + m_SndTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, oi, i)); + m_RcvTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, i, oi)); + } + } + } + + for (auto& kv : m_SndTags) { + std::sort(kv.second.begin(), kv.second.end()); + } + for (auto& kv : m_RcvTags) { + std::sort(kv.second.begin(), kv.second.end()); + } +} + +}} + +#endif diff --git a/Src/Base/AMReX_Math.H b/Src/Base/AMReX_Math.H index 7996830d534..3eed941fb00 100644 --- a/Src/Base/AMReX_Math.H +++ b/Src/Base/AMReX_Math.H @@ -9,7 +9,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { inline namespace disabled { diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index dfb75dacbf9..70e6facaee7 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include @@ -190,7 +191,7 @@ public: /** * \brief Returns the maximum *absolute* values contained in - * each component of "comps" of the MultiFab. No ghost cells are used. + * each component of "comps" of the MultiFab. "nghost" ghost cells are used. */ Vector norm0 (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false ) const; Vector norminf (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false) const { @@ -232,6 +233,13 @@ public: */ Real sum (int comp = 0, bool local = false) const; /** + * \brief Same as sum with local=false, but for non-cell-centered data, this + * skips non-unique points that are owned by multiple boxes. + */ + Real sum_unique (int comp = 0, + bool local = false, + const Periodicity& period = Periodicity::NonPeriodic()) const; + /** * \brief Adds the scalar value val to the value of each cell in the * specified subregion of the MultiFab. The subregion consists * of the num_comp components starting at component comp. diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp index 9e2f37adf37..83664b307d4 100644 --- a/Src/Base/AMReX_MultiFab.cpp +++ b/Src/Base/AMReX_MultiFab.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef AMREX_MEM_PROFILING #include @@ -1586,6 +1587,58 @@ MultiFab::sum (int comp, bool local) const return sm; } +Real +MultiFab::sum_unique (int comp, + bool local, + const Periodicity& period) const +{ + BL_PROFILE("MultiFab::sum_unique()"); + + // no duplicatly distributed points if cell centered + if (ixType().cellCentered()) + return this->sum(comp, local); + + // Owner is the grid with the lowest grid number containing the data + std::unique_ptr owner_mask = OwnerMask(period); + + Real sm = Real(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = this->const_arrays(); + auto const& msk = owner_mask->const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, *this, IntVect(0), + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> GpuTuple + { + return msk[box_no](i,j,k) ? ma[box_no](i,j,k,comp) : 0.0_rt; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& a = this->const_array(mfi); + Array4 const& msk = owner_mask->const_array(mfi); + Real tmp = 0.0_rt; + AMREX_LOOP_3D(bx, i, j, k, + { + tmp += msk(i,j,k) ? a(i,j,k,comp) : 0.0_rt; + }); + sm += tmp; // Do it this way so that it does not break regression tests. + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + void MultiFab::minus (const MultiFab& mf, int strt_comp, int num_comp, int nghost) { diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 1444bb90484..21f89c8ed6c 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -231,6 +231,35 @@ namespace amrex */ Gpu::HostVector sumToLine (MultiFab const& mf, int icomp, int ncomp, Box const& domain, int direction, bool local = false); + + /** \brief Volume weighted sum for a vector of MultiFabs + * + * Return a volume weighted sum of MultiFabs of AMR data. The sum is + * perform on a single component of the data. If the MultiFabs are + * built with EB Factories, the cut cell volume fraction will be + * included in the weight. + */ + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local = false); + + /** + * \brief Fourth-order interpolation from fine to coarse level. + * + * This is for high-order "average-down" of finite-difference data. If + * ghost cell data are used, it's the caller's responsibility to fill + * the ghost cells before calling this function. + * + * \param cmf coarse data + * \param scomp starting component + * \param ncomp number of component + * \param fmf fine data + * \param ratio refinement ratio. + */ + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio); } namespace amrex { diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 26a7242e89d..3ae4aa91b9f 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -1226,4 +1226,245 @@ namespace amrex } return hv; } + + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local) + { + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + +#ifdef AMREX_USE_EB + bool has_eb = !(mf[0]->isAllRegular()); +#endif + + int nlevels = mf.size(); + for (int ilev = 0; ilev < nlevels-1; ++ilev) { + iMultiFab mask = makeFineMask(*mf[ilev], *mf[ilev+1], IntVect(0), + ratio[ilev],Periodicity::NonPeriodic(), + 0, 1); + auto const& m = mask.const_arrays(); + auto const& a = mf[ilev]->const_arrays(); + auto const dx = geom[ilev].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf[ilev]->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf[ilev]->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[ilev].IsSPHERICAL()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[ilev].IsRZ()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#endif + { + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp); + }); + } + } + Gpu::streamSynchronize(); + } + + auto const& a = mf.back()->const_arrays(); + auto const dx = geom[nlevels-1].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf.back()->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf.back()->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[nlevels-1].IsSPHERICAL()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[nlevels-1].IsRZ()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + }); + } else +#endif + { + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + return dv*a[box_no](i,j,k,icomp); + }); + } + } + + auto const& hv = reduce_data.value(reduce_op); + Real r = amrex::get<0>(hv); + + if (!local) { + ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub()); + } + return r; + } + + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio) + { + AMREX_ASSERT(AMREX_D_TERM( (ratio[0] == 2 || ratio[0] == 4), + && (ratio[1] == 2 || ratio[1] == 4), + && (ratio[2] == 2 || ratio[2] == 4))); + + MultiFab tmp(amrex::coarsen(fmf.boxArray(), ratio), fmf.DistributionMap(), + ncomp, 0); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { +#if (AMREX_SPACEDIM > 1) + FArrayBox xtmp; +#if (AMREX_SPACEDIM > 2) + FArrayBox ytmp; +#endif +#endif + for (MFIter mfi(tmp,TilingIfNotGPU()); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fa = fmf.const_array(mfi,scomp); + + Box xbx = bx; +#if (AMREX_SPACEDIM == 1) + auto const& xa = tmp.array(mfi); +#else + xbx.refine(IntVect(AMREX_D_DECL(1,ratio[1],ratio[2]))); + if (ratio[1] == 2) { xbx.grow(1,1); } +#if (AMREX_SPACEDIM == 3) + if (ratio[2] == 2) { xbx.grow(2,1); } +#endif + xtmp.resize(xbx,ncomp); + Elixir eli = xtmp.elixir(); + auto const& xa = xtmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(xbx, ncomp, i, j, k, n, + { + int ii = 2*i; + xa(i,j,k,n) = Real(1./16)*(Real(9.)*(fa(ii ,j,k,n) + + fa(ii+1,j,k,n)) + - fa(ii-1,j,k,n) + - fa(ii+2,j,k,n)); + }); + +#if (AMREX_SPACEDIM > 1) + Box ybx = bx; + auto const& xca = xtmp.const_array(); +#if (AMREX_SPACEDIM == 2) + auto const& ya = tmp.array(mfi); +#else + ybx.refine(IntVect(AMREX_D_DECL(1,1,ratio[2]))); + if (ratio[2] == 2) { ybx.grow(2,1); } + ytmp.resize(ybx,ncomp); + eli.append(ytmp.elixir()); + auto const& ya = ytmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(ybx, ncomp, i, j, k, n, + { + int jj = 2*j; + ya(i,j,k,n) = Real(1./16)*(Real(9.)*(xca(i,jj ,k,n) + + xca(i,jj+1,k,n)) + - xca(i,jj-1,k,n) + - xca(i,jj+2,k,n)); + }); + +#if (AMREX_SPACEDIM == 3) + auto const& yca = ytmp.const_array(); + auto const& ca = tmp.array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, + { + int kk = 2*k; + ca(i,j,k,n) = Real(1./16)*(Real(9.)*(yca(i,j,kk ,n) + + yca(i,j,kk+1,n)) + - yca(i,j,kk-1,n) + - yca(i,j,kk+2,n)); + }); +#endif +#endif + } + } + + cmf.ParallelCopy(tmp, 0, scomp, ncomp); + } } diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index 7613a35de5b..fd534685a7b 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -1038,4 +1038,13 @@ FillPolar (FabArray& mf, Box const& domain); #include +namespace amrex { + using NonLocalBC::ParallelCopy; + using NonLocalBC::ParallelCopy_nowait; + using NonLocalBC::ParallelCopy_finish; + using NonLocalBC::MultiBlockIndexMapping; + using NonLocalBC::MultiBlockCommMetaData; + using NonLocalBC::CommHandler; +} + #endif diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H index 064344cafd4..de9c54a1b6c 100644 --- a/Src/Base/AMReX_Orientation.H +++ b/Src/Base/AMReX_Orientation.H @@ -75,7 +75,7 @@ public: * according to the above ordering. */ AMREX_GPU_HOST_DEVICE - operator int () const noexcept { return val; } + constexpr operator int () const noexcept { return val; } //! Return opposite orientation. AMREX_GPU_HOST_DEVICE Orientation flip () const noexcept @@ -97,6 +97,30 @@ public: //! Read from an istream. friend std::istream& operator>> (std::istream& os, Orientation& o); + //! Int value of the x-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xlo () noexcept { return 0; } + + //! Int value of the x-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xhi () noexcept { return AMREX_SPACEDIM; } + + //! Int value of the y-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int ylo () noexcept { return 1; } + + //! Int value of the y-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; } + + //! Int value of the z-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zlo () noexcept { return 2; } + + //! Int value of the z-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; } + private: //! Used internally. AMREX_GPU_HOST_DEVICE diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 7217b4e814e..bfae2c01627 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -608,7 +608,10 @@ namespace amrex void AllocateBuffer (size_type a_capacity) noexcept { pointer new_data = allocate(a_capacity); - if (m_data) detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + if (m_data) { + detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + amrex::Gpu::streamSynchronize(); + } deallocate(m_data, capacity()); m_data = new_data; m_capacity = a_capacity; @@ -621,9 +624,10 @@ namespace amrex pointer new_data = allocate(a_capacity); if (m_data) { - memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); + memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); memCopyImpl(new_data + a_index + a_count, m_data + a_index, (size() - a_index)*sizeof(T), *this); + amrex::Gpu::streamSynchronize(); } deallocate(m_data, capacity()); m_data = new_data; diff --git a/Src/Base/AMReX_ParallelDescriptor.H b/Src/Base/AMReX_ParallelDescriptor.H index 38cd4cdf167..03c431d135a 100644 --- a/Src/Base/AMReX_ParallelDescriptor.H +++ b/Src/Base/AMReX_ParallelDescriptor.H @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef BL_AMRPROF #include @@ -211,6 +212,11 @@ while ( false ) extern AMREX_EXPORT MPI_Comm m_comm; inline MPI_Comm Communicator () noexcept { return m_comm; } +#ifdef AMREX_USE_MPI + extern Vector m_mpi_types; + extern Vector m_mpi_ops; +#endif + //! return the number of MPI ranks local to the current Parallel Context inline int NProcs () noexcept @@ -1479,6 +1485,73 @@ void DoReduce (T* r, MPI_Op op, int cnt, int cpu) #endif } +#ifdef AMREX_USE_MPI +namespace ParallelDescriptor { + +template +struct Mpi_typemap> +{ + static MPI_Datatype type () + { + static MPI_Datatype mpi_type = MPI_DATATYPE_NULL; + if (mpi_type == MPI_DATATYPE_NULL) { + using T = ValLocPair; + static_assert(std::is_trivially_copyable::value, + "To communicate with MPI, ValLocPair must be trivially copyable."); + static_assert(std::is_standard_layout::value, + "To communicate with MPI, ValLocPair must be standard layout"); + + T vlp[2]; + MPI_Datatype types[] = { + Mpi_typemap::type(), + Mpi_typemap::type(), + }; + int blocklens[] = { 1, 1 }; + MPI_Aint disp[2]; + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) ); + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) ); + disp[1] -= disp[0]; + disp[0] = 0; + BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types, + &mpi_type) ); + MPI_Aint lb, extent; + BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) ); + if (extent != sizeof(T)) { + MPI_Datatype tmp = mpi_type; + BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0, sizeof(vlp[0]), &mpi_type) ); + BL_MPI_REQUIRE( MPI_Type_free(&tmp) ); + } + BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) ); + + m_mpi_types.push_back(&mpi_type); + } + return mpi_type; + } +}; + +template +MPI_Op Mpi_op () +{ + static MPI_Op mpi_op = MPI_OP_NULL; + if (mpi_op == MPI_OP_NULL) { + static auto user_fn = [] (void *invec, void *inoutvec, int* len, + MPI_Datatype * /*datatype*/) + { + auto in = static_cast(invec); + auto out = static_cast(inoutvec); + for (int i = 0; i < *len; ++i) { + out[i] = F()(in[i],out[i]); + } + }; + BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) ); + m_mpi_ops.push_back(&mpi_op); + } + return mpi_op; +} + +} +#endif + } #endif /*BL_PARALLELDESCRIPTOR_H*/ diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 6d457d28398..3ea202d9b50 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -65,6 +65,11 @@ namespace amrex { namespace ParallelDescriptor { MPI_Comm m_comm = MPI_COMM_NULL; // communicator for all ranks, probably MPI_COMM_WORLD +#ifdef AMREX_USE_MPI + Vector m_mpi_types; + Vector m_mpi_ops; +#endif + int m_MinTag = 1000, m_MaxTag = -1; const int ioProcessor = 0; @@ -357,10 +362,20 @@ EndParallel () BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_indextype) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_box) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_lull_t) ); + for (auto t : m_mpi_types) { + BL_MPI_REQUIRE( MPI_Type_free(t) ); + *t = MPI_DATATYPE_NULL; + } + for (auto op : m_mpi_ops) { + BL_MPI_REQUIRE( MPI_Op_free(op) ); + *op = MPI_OP_NULL; + } mpi_type_intvect = MPI_DATATYPE_NULL; mpi_type_indextype = MPI_DATATYPE_NULL; mpi_type_box = MPI_DATATYPE_NULL; mpi_type_lull_t = MPI_DATATYPE_NULL; + m_mpi_types.clear(); + m_mpi_ops.clear(); } if (!call_mpi_finalize) { diff --git a/Src/Base/AMReX_ParallelReduce.H b/Src/Base/AMReX_ParallelReduce.H index e0e1e98b66e..3a6db500a2a 100644 --- a/Src/Base/AMReX_ParallelReduce.H +++ b/Src/Base/AMReX_ParallelReduce.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -120,6 +121,32 @@ namespace ParallelGather { namespace ParallelAllReduce { + template + void Max (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + + template + void Min (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + template void Max (T& v, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, -1, comm); @@ -174,6 +201,34 @@ namespace ParallelAllReduce { namespace ParallelReduce { + template + void Max (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + + template + void Min (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + template void Max (T& v, int root, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, root, comm); diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index 6555ee5aec0..504aaa4f256 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -554,7 +554,7 @@ public: const std::string& val); //! keyword for files to load - static std::string FileKeyword; + static std::string const FileKeyword; //! Add keys and values from a file to the end of the PP table. static void addfile (std::string const filename); diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index 79e80fbb8bd..253ad0e37e0 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -34,7 +34,7 @@ static bool finalize_verbose = false; static bool finalize_verbose = true; #endif -std::string ParmParse::FileKeyword = "FILE"; +std::string const ParmParse::FileKeyword = "FILE"; // // Used by constructor to build table. @@ -609,7 +609,8 @@ addDefn (std::string& def, tab.push_back(ParmParse::PP_entry(def,val)); } val.clear(); - def = std::string(); + if ( def != ParmParse::FileKeyword ) + def = std::string(); } void @@ -991,7 +992,8 @@ ParmParse::prefixedName (const std::string& str) const void ParmParse::addfile (std::string const filename) { auto l = std::list{filename}; - addDefn(FileKeyword, + auto file = FileKeyword; + addDefn(file, l, g_table); } diff --git a/Src/Base/AMReX_RandomEngine.H b/Src/Base/AMReX_RandomEngine.H index a639e4731d7..967b9e66569 100644 --- a/Src/Base/AMReX_RandomEngine.H +++ b/Src/Base/AMReX_RandomEngine.H @@ -15,7 +15,6 @@ #include #elif defined(AMREX_USE_DPCPP) #include -namespace sycl = cl::sycl; #include namespace mkl = oneapi::mkl; #endif diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 9c07b7b4a2a..9076e984828 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -6,9 +6,11 @@ #include #include #include +#include #include #include +#include namespace amrex { @@ -133,7 +135,12 @@ struct ReduceOpMin void local_update (T& d, T const& s) const noexcept { d = amrex::min(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::max(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::max(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::max(); } }; struct ReduceOpMax @@ -161,7 +168,12 @@ struct ReduceOpMax void local_update (T& d, T const& s) const noexcept { d = amrex::max(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::lowest(); } }; struct ReduceOpLogicalAnd @@ -899,7 +911,8 @@ bool AnyOf (Box const& box, P&& pred) } }); #else - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, ec.numBlocks, ec.numThreads, 0, + Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { __shared__ int has_any; if (threadIdx.x == 0) has_any = *dp; diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H new file mode 100644 index 00000000000..b5e35f783c5 --- /dev/null +++ b/Src/Base/AMReX_RungeKutta.H @@ -0,0 +1,293 @@ +#ifndef AMREX_RUNGE_KUTTA_H_ +#define AMREX_RUNGE_KUTTA_H_ +#include + +#include + +namespace amrex { + +/** + * \brief Functions for Runge-Kutta methods + * + * This namespace RungeKutta has functions for a number RK methods, RK2, RK3 + * and RK4. Here, RK2 refers to the explicit trapezoid rule, RK3 refers to + * the SSPRK3 + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Third-order_Strong_Stability_Preserving_Runge-Kutta_(SSPRK3)), + * and RK4 is the classical fourth-order method + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Classic_fourth-order_method). + * The function templates take the old data in FabArray/MultiFab as input, + * and evolve the system for one time step. The result is stored in another + * FabArray/MultiFab. These two FabArrays must have ghost cells if they are + * needed for evaluating the right-hand side. The functions take three + * callable objects for computing the right-hand side, filling ghost cells, + * and optionally post-processing RK stage results. For RK3 and RK4, they + * also need a callable object for storing the data needed for filling + * coarse/fine boundaries in AMR simulations. + * + * The callable object for right-hand side has the signature of `void(int + * stage, MF& dudt, MF const& u, Real t, Real dt)`, where `stage` is the RK + * stage number starting from 1, `dudt` is the output, `u` is the input, `t` + * is the first-order approximate time of the stage, and `dt` is the + * sub-time step, which can be used for reflux operations in AMR + * simulations. + * + * The callable object for filling ghost cells has the signature of + * `void(int stage, MF& u, Real t)`, where `stage` is the RK stage number + * starting from 1, `u` is a FabArray/MultiFab whose ghost cells need to be + * filled, and `t` is the first-order approximate time of the data at that + * stage. The FillPatcher class can be useful for implementing such a + * callable. See AmrLevel::RK for an example. + * + * The callable object for post-processing stage results is optional. It's + * no-op by default. Its function signature is `void(int stage, MF& u)`, + * where `stage` is the RK stage number and `u` is the result of that stage. + * + * For RK3 and RK4, one must also provide a callable object with the + * signature of `void(Array const& rkk)`, where `order` is the RK + * order and `rkk` contains the right-hand side at all the RK stages. The + * FillPatcher class can be useful for implementing such a callable. See + * AmrLevel::RK for an example. + */ +namespace RungeKutta { + +struct PostStageNoOp { + template + std::enable_if_t::value> operator() (int, MF&) const {} +}; + +namespace detail { +//! Unew = Uold + dUdt * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*sdot[bi](i,j,k,n); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (dUdt1 + dUdt2) * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt1, MF const& dUdt2, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot1 = dUdt1.const_arrays(); + auto const& sdot2 = dUdt2.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*(sdot1[bi](i,j,k,n) + + sdot2[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = (Uold+Unew)/2 + dUdt * dt/2 +template +void rk2_update_2 (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = Real(0.5)*(snew[bi](i,j,k,n) + + sold[bi](i,j,k,n) + + sdot[bi](i,j,k,n) * dt); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1 + k2 + 4*k3) * dt6, where dt6 = dt/6 +template +void rk3_update_3 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * (k1[bi](i,j,k,n) + k2[bi](i,j,k,n) + + Real(4.) * k3[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1+k4+2*(k2+k3))*dt6, where dt6 = dt/6 +template +void rk4_update_4 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + auto const& k4 = rkk[3].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * ( k1[bi](i,j,k,n) + k4[bi](i,j,k,n) + + Real(2.)*(k2[bi](i,j,k,n) + k3[bi](i,j,k,n))); + }); + Gpu::streamSynchronize(); +} +} + +/** + * \brief Time stepping with RK2 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param post_stage post-processing stage results + */ +template +void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta2"); + + MF dUdt(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + + // RK2 stage 1 + fillbndry(1, Uold, time); + frhs(1, dUdt, Uold, time, Real(0.5)*dt); + // Unew = Uold + dt * dUdt + detail::rk_update(Unew, Uold, dUdt, dt); + post_stage(1, Unew); + + // RK2 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, dUdt, Unew, time, Real(0.5)*dt); + // Unew = (Uold+Unew)/2 + dUdt_2 * dt/2, + // which is Unew = Uold + dt/2 * (dUdt_1 + dUdt_2) + detail::rk2_update_2(Unew, Uold, dUdt, dt); + post_stage(2, Unew); +} + +/** + * \brief Time stepping with RK3 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta3"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK3 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt + detail::rk_update(Unew, Uold, rkk[0], dt); + post_stage(1, Unew); + + // RK3 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, rkk[1], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1+k2) * dt/4 + detail::rk_update(Unew, Uold, rkk[0], rkk[1], Real(0.25)*dt); + post_stage(2, Unew); + + // RK3 stage 3 + Real t_half = time + Real(0.5)*dt; + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt*Real(2./3.)); + // Unew = Uold + (k1/6 + k2/6 + k3*(2/3)) * dt + detail::rk3_update_3(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(3, Unew); + + store_crse_data(rkk); +} + +/** + * \brief Time stepping with RK4 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta4"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK4 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt/2 + detail::rk_update(Unew, Uold, rkk[0], Real(0.5)*dt); + post_stage(1, Unew); + + // RK4 stage 2 + Real t_half = time + Real(0.5)*dt; + fillbndry(2, Unew, t_half); + frhs(2, rkk[1], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k2 * dt/2 + detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); + post_stage(2, Unew); + + // RK4 stage 3 + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k3 * dt; + detail::rk_update(Unew, Uold, rkk[2], dt); + post_stage(3, Unew); + + // RK4 stage 4 + fillbndry(4, Unew, time+dt); + frhs(4, rkk[3], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt + detail::rk4_update_4(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(4, Unew); + + store_crse_data(rkk); +} + +}} + +#endif diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H index 96aefb870b6..3dc5cb98f9a 100644 --- a/Src/Base/AMReX_Scan.H +++ b/Src/Base/AMReX_Scan.H @@ -197,7 +197,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -226,7 +226,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -244,7 +244,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -277,7 +277,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(1, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -293,7 +293,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = (offset < nblocks) ? blocksum_p[offset] : 0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -311,7 +311,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -417,7 +417,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -472,7 +472,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -490,7 +490,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -543,7 +543,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum // implement our own __ballot unsigned status_bf = (stva.status == 'p') ? (0x1u << lane) : 0; for (int i = 1; i < Gpu::Device::warp_size; i *= 2) { - status_bf |= sg.shuffle_xor(status_bf, i); + status_bf |= sycl::permute_group_by_xor(sg, status_bf, i); } bool stop_lookback = status_bf & 0x1u; @@ -563,7 +563,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum } for (int i = Gpu::Device::warp_size/2; i > 0; i /= 2) { - x += sg.shuffle_down(x,i); + x += sycl::shift_group_left(sg, x,i); } } diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index e44758bde6d..f44157160a7 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -77,8 +77,8 @@ struct Table2D { T* AMREX_RESTRICT p = nullptr; Long jstride = 0; - GpuArray begin{1,1}; - GpuArray end{0,0}; + GpuArray begin{{1,1}}; + GpuArray end{{0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table2D () noexcept {} @@ -142,8 +142,8 @@ struct Table3D T* AMREX_RESTRICT p = nullptr; Long jstride = 0; Long kstride = 0; - GpuArray begin{1,1,1}; - GpuArray end{0,0,0}; + GpuArray begin{{1,1,1}}; + GpuArray end{{0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table3D () noexcept {} @@ -213,8 +213,8 @@ struct Table4D Long jstride = 0; Long kstride = 0; Long nstride = 0; - GpuArray begin{1,1,1,1}; - GpuArray end{0,0,0,0}; + GpuArray begin{{1,1,1,1}}; + GpuArray end{{0,0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table4D () noexcept {} diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 677b4448d3b..57c9ea0479c 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -10,7 +10,7 @@ #endif #if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) -#include +#include #endif #include diff --git a/Src/Base/AMReX_ValLocPair.H b/Src/Base/AMReX_ValLocPair.H new file mode 100644 index 00000000000..b7b480b1dba --- /dev/null +++ b/Src/Base/AMReX_ValLocPair.H @@ -0,0 +1,35 @@ +#ifndef AMREX_VALLOCPAIR_H_ +#define AMREX_VALLOCPAIR_H_ + +#include + +namespace amrex { + +template +struct ValLocPair +{ + TV value; + TI index; + + static constexpr ValLocPair max () { + return ValLocPair{std::numeric_limits::max(), TI()}; + } + + static constexpr ValLocPair lowest () { + return ValLocPair{std::numeric_limits::lowest(), TI()}; + } + + friend constexpr bool operator< (ValLocPair const& a, ValLocPair const& b) + { + return a.value < b.value; + } + + friend constexpr bool operator> (ValLocPair const& a, ValLocPair const& b) + { + return a.value > b.value; + } +}; + +} + +#endif diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H index 12777a08307..bfab54abf8d 100644 --- a/Src/Base/AMReX_VisMF.H +++ b/Src/Base/AMReX_VisMF.H @@ -638,7 +638,6 @@ Read (FabArray& fa, const std::string& name) } int totalioreqs = nboxes; - int messtotal = 0; int reqspending = 0; int iopfileindex; std::deque iopreads; @@ -669,7 +668,6 @@ Read (FabArray& fa, const std::string& name) } } else { ParallelDescriptor::Send(vreads, tryproc, readtag); - ++messtotal; ++reqspending; } availablefiles.erase(afilesiter); diff --git a/Src/Base/AMReX_bc_types_mod.F90 b/Src/Base/AMReX_bc_types_mod.F90 index c326d49e419..c1c6f237ba8 100644 --- a/Src/Base/AMReX_bc_types_mod.F90 +++ b/Src/Base/AMReX_bc_types_mod.F90 @@ -15,6 +15,9 @@ module amrex_bc_types_module integer, parameter, public :: amrex_bc_ext_dir = 3 integer, parameter, public :: amrex_bc_hoextrap = 4 integer, parameter, public :: amrex_bc_hoextrapcc = 5 + integer, parameter, public :: amrex_bc_user_1 = 1001 + integer, parameter, public :: amrex_bc_user_2 = 1002 + integer, parameter, public :: amrex_bc_user_3 = 1003 integer, parameter, public :: amrex_pbc_interior = 0 integer, parameter, public :: amrex_pbc_inflow = 1 diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 6a2db4526cd..7af11a24b5a 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -30,6 +30,7 @@ target_sources( amrex AMReX_Utility.cpp AMReX_FileSystem.H AMReX_FileSystem.cpp + AMReX_ValLocPair.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H @@ -71,6 +72,7 @@ target_sources( amrex AMReX_DataAllocator.H AMReX_BLProfiler.H AMReX_BLBackTrace.H + AMReX_BLBackTrace.cpp AMReX_BLFort.H AMReX_NFiles.H AMReX_NFiles.cpp @@ -187,6 +189,7 @@ target_sources( amrex AMReX_IntegratorBase.H AMReX_RKIntegrator.H AMReX_TimeIntegrator.H + AMReX_RungeKutta.H # GPU -------------------------------------------------------------------- AMReX_Gpu.H AMReX_GpuQualifiers.H @@ -222,6 +225,7 @@ target_sources( amrex AMReX_MFParallelForC.H AMReX_MFParallelForG.H AMReX_TagParallelFor.H + AMReX_CTOParallelForImpl.H AMReX_ParReduce.H # CUDA -------------------------------------------------------------------- AMReX_CudaGraph.H @@ -231,8 +235,6 @@ target_sources( amrex # Memory pool ------------------------------------------------------------- AMReX_MemPool.cpp AMReX_MemPool.H - # Profiling --------------------------------------------------------------- - AMReX_BLBackTrace.cpp # Parser --------------------------------------------------------------- Parser/AMReX_Parser.cpp Parser/AMReX_Parser.H @@ -305,3 +307,8 @@ endif () if (AMReX_TINY_PROFILE) target_sources(amrex PRIVATE AMReX_TinyProfiler.cpp AMReX_TinyProfiler.H ) endif () + +# MPMD +if (AMReX_MPI) + target_sources(amrex PRIVATE AMReX_MPMD.cpp AMReX_MPMD.H ) +endif () diff --git a/Src/Base/Make.package b/Src/Base/Make.package index d7c4e520e7b..9dd615b3251 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -22,6 +22,7 @@ C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H +C$(AMREX_BASE)_headers += AMReX_ValLocPair.H C$(AMREX_BASE)_headers += AMReX_FileSystem.H C$(AMREX_BASE)_sources += AMReX_FileSystem.cpp @@ -100,6 +101,7 @@ C$(AMREX_BASE)_headers += AMReX_MFParallelForC.H C$(AMREX_BASE)_headers += AMReX_MFParallelForG.H C$(AMREX_BASE)_headers += AMReX_TagParallelFor.H +C$(AMREX_BASE)_headers += AMReX_CTOParallelForImpl.H C$(AMREX_BASE)_headers += AMReX_ParReduce.H @@ -203,7 +205,7 @@ C$(AMREX_BASE)_headers += AMReX_FEIntegrator.H C$(AMREX_BASE)_headers += AMReX_IntegratorBase.H C$(AMREX_BASE)_headers += AMReX_RKIntegrator.H C$(AMREX_BASE)_headers += AMReX_TimeIntegrator.H - +C$(AMREX_BASE)_headers += AMReX_RungeKutta.H # # Fortran interface routines. @@ -271,6 +273,10 @@ CEXE_sources += AMReX_Machine.cpp # Forward declaration CEXE_headers += AMReX_BaseFwd.H +ifeq ($(USE_MPI),TRUE) + CEXE_headers += AMReX_MPMD.H + CEXE_sources += AMReX_MPMD.cpp +endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base diff --git a/Src/Boundary/AMReX_LOUtil_K.H b/Src/Boundary/AMReX_LOUtil_K.H index b8fdb2a37ce..71bb1dd41d1 100644 --- a/Src/Boundary/AMReX_LOUtil_K.H +++ b/Src/Boundary/AMReX_LOUtil_K.H @@ -34,6 +34,22 @@ void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, int N, Real* AM } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, Real* AMREX_RESTRICT c) noexcept +{ + for (int j = 0; j < N; ++j) { + Real num = 1.0, den = 1.0; + for (int i = 0; i < N; ++i) { + if (i != j) { + num *= xInt-x[i]; + den *= x[j]-x[i]; + } + } + c[j] = num / den; + } +} + } #endif diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index ad56d532520..def8d2de9e0 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -49,6 +49,7 @@ public: virtual const Level& getLevel (const Geometry & geom) const = 0; virtual const Geometry& getGeometry (const Box& domain) const = 0; virtual const Box& coarsestDomain () const = 0; + virtual void addFineLevels (int num_new_fine_levels) = 0; protected: static AMREX_EXPORT Vector > m_instance; @@ -66,7 +67,7 @@ public: IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face); + bool extend_domain_face, int num_coarsen_opt); IndexSpaceImp (IndexSpaceImp const&) = delete; IndexSpaceImp (IndexSpaceImp &&) = delete; @@ -80,46 +81,67 @@ public: virtual const Box& coarsestDomain () const final { return m_geom.back().Domain(); } + virtual void addFineLevels (int num_new_fine_levels) final; using F = typename G::FunctionType; private: + G m_gshop; + bool m_build_coarse_level_by_coarsening; + bool m_extend_domain_face; + int m_num_coarsen_opt; + Vector > m_gslevel; Vector m_geom; Vector m_domain; Vector m_ngrow; - std::unique_ptr m_impfunc; }; #include bool ExtendDomainFace (); +int NumCoarsenOpt (); template void Build (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, bool build_coarse_level_by_coarsening = true, - bool extend_domain_face = ExtendDomainFace()) + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()) { BL_PROFILE("EB2::Initialize()"); IndexSpace::push(new IndexSpaceImp(gshop, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + extend_domain_face, + num_coarsen_opt)); } void Build (const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, - bool build_coarse_level_by_coarsening = true); + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()); + + +void BuildFromChkptFile (std::string const& fname, + const Geometry& geom, + int required_coarsening_level, + int max_coarsening_level, + int ngrow = 4, + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace()); int maxCoarseningLevel (const Geometry& geom); int maxCoarseningLevel (IndexSpace const* ebis, const Geometry& geom); +void addFineLevels (int num_new_fine_levels); + }} #endif diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 3bdf44ee4e9..fc2d75e0a01 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -21,12 +22,14 @@ AMREX_EXPORT Vector > IndexSpace::m_instance; AMREX_EXPORT int max_grid_size = 64; AMREX_EXPORT bool extend_domain_face = true; +AMREX_EXPORT int num_coarsen_opt = 0; void Initialize () { ParmParse pp("eb2"); pp.queryAdd("max_grid_size", max_grid_size); pp.queryAdd("extend_domain_face", extend_domain_face); + pp.queryAdd("num_coarsen_opt", num_coarsen_opt); amrex::ExecOnFinalize(Finalize); } @@ -41,6 +44,11 @@ bool ExtendDomainFace () return extend_domain_face; } +int NumCoarsenOpt () +{ + return num_coarsen_opt; +} + void IndexSpace::push (IndexSpace* ispace) { @@ -74,7 +82,8 @@ const IndexSpace* TopIndexSpaceIfPresent() noexcept { void Build (const Geometry& geom, int required_coarsening_level, - int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening) + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face, int a_num_coarsen_opt) { ParmParse pp("eb2"); std::string geom_type; @@ -85,7 +94,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::AllRegularIF rif; EB2::GeometryShop gshop(rif); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "box") { @@ -102,7 +112,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(bf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "cylinder") { @@ -127,7 +138,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(cf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "plane") { @@ -141,7 +153,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(pf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "sphere") { @@ -158,7 +171,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "torus") { @@ -177,7 +191,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "parser") { @@ -188,7 +203,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::ParserIF pif(parser.compile<3>()); EB2::GeometryShop gshop(pif,parser); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "stl") { @@ -206,7 +222,8 @@ Build (const Geometry& geom, int required_coarsening_level, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + a_extend_domain_face, + a_num_coarsen_opt)); } else { @@ -214,6 +231,29 @@ Build (const Geometry& geom, int required_coarsening_level, } } +void addFineLevels (int num_new_fine_levels) +{ + BL_PROFILE("EB2::addFineLevels()"); + auto p = const_cast(TopIndexSpace()); + if (p) { + p->addFineLevels(num_new_fine_levels); + } +} + +void +BuildFromChkptFile (std::string const& fname, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face) +{ + ChkptFile chkpt_file(fname); + IndexSpace::push(new IndexSpaceChkptFile(chkpt_file, + geom, required_coarsening_level, + max_coarsening_level, ngrow, + build_coarse_level_by_coarsening, + a_extend_domain_face)); +} + namespace { static int comp_max_crse_level (Box cdomain, const Box& domain) { diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index bf17844658c..060ed8f4df4 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -391,6 +391,13 @@ void build_cells (Box const& bx, Array4 const& cell, }); } + set_connection_flags(bxg1, cell, fx, fy); +} + +void set_connection_flags (Box const& bxg1, + Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept +{ // Build neighbors. By default, all neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_3D_C.H b/Src/EB/AMReX_EB2_3D_C.H index 14543f81d25..3ea77f149fe 100644 --- a/Src/EB/AMReX_EB2_3D_C.H +++ b/Src/EB/AMReX_EB2_3D_C.H @@ -200,11 +200,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nxm = 0; } else if (n == 2) { nxm = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xlo-face"); } int nxp = -1; @@ -213,11 +210,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nxp = 0; } else if (n == 2) { nxp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xhi-face"); } // y-faces @@ -227,11 +221,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nym = 0; } else if (n == 2) { nym = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on ylo-face"); } int nyp = -1; @@ -240,11 +231,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nyp = 0; } else if (n == 2) { nyp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on yhi-face"); } // z-faces @@ -254,11 +242,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nzm = 0; } else if (n == 2) { nzm = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zlo-face"); } int nzp = -1; @@ -267,11 +252,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nzp = 0; } else if (n == 2) { nzp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zhi-face"); } if (nxm == 1 && nym == 1 && nzm == 1 && nxp == 1 && nyp == 1 && nzp == 1) { diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 0077d817ae4..767626eb9e9 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -853,89 +853,96 @@ void build_cells (Box const& bx, Array4 const& cell, nsmallcells += hp[0]; nmulticuts += hp[1]; + Box const& nbxg1 = amrex::surroundingNodes(bxg1); + Box const& bxg1x = amrex::surroundingNodes(bxg1,0); + Box const& bxg1y = amrex::surroundingNodes(bxg1,1); + Box const& bxg1z = amrex::surroundingNodes(bxg1,2); + AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, + { + if (levset(i,j,k) < Real(0.0)) { + bool zero_levset = false; + if (bxg1.contains(i-1,j-1,k-1) + && cell(i-1,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k-1) + && cell(i ,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k-1) + && cell(i-1,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k-1) + && cell(i ,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j-1,k ) + && cell(i-1,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k ) + && cell(i ,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k ) + && cell(i-1,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k ) + && cell(i ,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k-1) + && fx(i ,j-1,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k-1) + && fx(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k ) + && fx(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k ) + && fx(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k-1) + && fy(i-1,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k-1) + && fy(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k ) + && fy(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k ) + && fy(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j-1,k ) + && fz(i-1,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j-1,k ) + && fz(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j ,k ) + && fz(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j ,k ) + && fz(i ,j ,k ) == Type::covered) { + zero_levset = true; + } + if (zero_levset) { + levset(i,j,k) = Real(0.0); + } + } + }); + if (nsmallcells > 0 || nmulticuts > 0) { if (!cover_multiple_cuts && nmulticuts > 0) { amrex::Abort("amrex::EB2::build_cells: multi-cuts not supported"); } - Box const& nbxg1 = amrex::surroundingNodes(bxg1); - Box const& bxg1x = amrex::surroundingNodes(bxg1,0); - Box const& bxg1y = amrex::surroundingNodes(bxg1,1); - Box const& bxg1z = amrex::surroundingNodes(bxg1,2); - AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, - { - if (levset(i,j,k) < Real(0.0)) { - bool zero_levset = false; - if (bxg1.contains(i-1,j-1,k-1) - && cell(i-1,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k-1) - && cell(i ,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k-1) - && cell(i-1,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k-1) - && cell(i ,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j-1,k ) - && cell(i-1,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k ) - && cell(i ,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k ) - && cell(i-1,j ,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k ) - && cell(i ,j ,k ).isCovered()) { - zero_levset = true; - } else if (cover_multiple_cuts) { - if (bxg1x.contains(i ,j-1,k-1) - && fx(i ,j-1,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k-1) - && fx(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j-1,k ) - && fx(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k ) - && fx(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k-1) - && fy(i-1,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k-1) - && fy(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k ) - && fy(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k ) - && fy(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j-1,k ) - && fz(i-1,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j-1,k ) - && fz(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j ,k ) - && fz(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j ,k ) - && fz(i ,j ,k ) == Type::covered) { - zero_levset = true; - } - } - if (zero_levset) { - levset(i,j,k) = Real(0.0); - } - } - }); return; + } else { + set_connection_flags(bx, bxg1, cell, ctmp, fx, fy, fz); } +} +void set_connection_flags (Box const& bx, + Box const& bxg1, Array4 const& cell, + Array4 const& ctmp, Array4 const& fx, + Array4 const& fy, Array4 const& fz) noexcept +{ // Build neighbors. By default all 26 neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_C.H b/Src/EB/AMReX_EB2_C.H index 7e752f3d051..0be84fdc913 100644 --- a/Src/EB/AMReX_EB2_C.H +++ b/Src/EB/AMReX_EB2_C.H @@ -36,6 +36,9 @@ void build_cells (Box const& bx, Array4 const& cell, Real small_volfrac, Geometry const& geom, bool extend_domain_face, int& nsmallcells, int const nmulticuts) noexcept; +void set_connection_flags(Box const& bxg1, Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept; + #elif (AMREX_SPACEDIM == 3) int build_faces (Box const& bx, Array4 const& cell, @@ -67,6 +70,11 @@ void build_cells (Box const& bx, Array4 const& cell, bool extend_domain_face, bool cover_multiple_cuts, int& nsmallcells, int& nmulticuts) noexcept; +void set_connection_flags(Box const& bx, Box const& bxg1, + Array4 const& cell, Array4 const& ctmp, + Array4 const& fx, Array4 const& fy, + Array4 const& fz) noexcept; + #endif void intercept_to_edge_centroid (AMREX_D_DECL(Array4 const& excent, diff --git a/Src/EB/AMReX_EB2_GeometryShop.H b/Src/EB/AMReX_EB2_GeometryShop.H index ff80dd20593..2a7565abad2 100644 --- a/Src/EB/AMReX_EB2_GeometryShop.H +++ b/Src/EB/AMReX_EB2_GeometryShop.H @@ -244,6 +244,7 @@ public: } } } + amrex::ignore_unused(nzero); if (nbody == 0) { return allregular; diff --git a/Src/EB/AMReX_EB2_IndexSpaceI.H b/Src/EB/AMReX_EB2_IndexSpaceI.H index 192df9f43a0..e7db810b03b 100644 --- a/Src/EB/AMReX_EB2_IndexSpaceI.H +++ b/Src/EB/AMReX_EB2_IndexSpaceI.H @@ -4,7 +4,11 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face) + bool extend_domain_face, int num_coarsen_opt) + : m_gshop(gshop), + m_build_coarse_level_by_coarsening(build_coarse_level_by_coarsening), + m_extend_domain_face(extend_domain_face), + m_num_coarsen_opt(num_coarsen_opt) { // build finest level (i.e., level 0) first AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); @@ -20,7 +24,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(geom.Domain()); m_ngrow.push_back(ngrow_finest); m_gslevel.reserve(max_coarsening_level+1); - m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face); + m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face, + num_coarsen_opt); for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) { @@ -44,7 +49,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, if (build_coarse_level_by_coarsening) { amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); } else { - m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face); + m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face, + num_coarsen_opt-ilev); } } else { break; @@ -54,8 +60,6 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(cdomain); m_ngrow.push_back(ng); } - - m_impfunc = std::make_unique(gshop.GetImpFunc()); } @@ -76,3 +80,29 @@ IndexSpaceImp::getGeometry (const Box& dom) const int i = std::distance(m_domain.begin(), it); return m_geom[i]; } + +template +void +IndexSpaceImp::addFineLevels (int num_new_fine_levels) +{ + if (num_new_fine_levels <= 0) { return; } + + if (m_num_coarsen_opt > 0) { + m_num_coarsen_opt += num_new_fine_levels; + } + + IndexSpaceImp fine_isp(m_gshop, amrex::refine(m_geom[0], 1< + +#include +#include + +#include + +namespace amrex { namespace EB2 { + +class IndexSpaceChkptFile + : public IndexSpace +{ +public: + + IndexSpaceChkptFile (const ChkptFile& chkptfile, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face); + + IndexSpaceChkptFile (IndexSpaceChkptFile const&) = delete; + IndexSpaceChkptFile (IndexSpaceChkptFile &&) = delete; + void operator= (IndexSpaceChkptFile const&) = delete; + void operator= (IndexSpaceChkptFile &&) = delete; + + virtual ~IndexSpaceChkptFile () {} + + virtual const Level& getLevel (const Geometry& geom) const final; + virtual const Geometry& getGeometry (const Box& dom) const final; + virtual const Box& coarsestDomain () const final { + return m_geom.back().Domain(); + } + virtual void addFineLevels (int num_new_fine_levels) final; + +private: + + Vector m_chkpt_file_level; + Vector m_geom; + Vector m_domain; + Vector m_ngrow; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp new file mode 100644 index 00000000000..b0318dd402c --- /dev/null +++ b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp @@ -0,0 +1,86 @@ +#include + +namespace amrex { namespace EB2 { + +IndexSpaceChkptFile::IndexSpaceChkptFile (const ChkptFile& chkpt_file, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face) +{ + Gpu::LaunchSafeGuard lsg(true); // Always use GPU + + // build finest level (i.e., level 0) first + AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); + max_coarsening_level = std::max(required_coarsening_level,max_coarsening_level); + max_coarsening_level = std::min(30,max_coarsening_level); + + int ngrow_finest = std::max(ngrow,0); + for (int i = 1; i <= required_coarsening_level; ++i) { + ngrow_finest *= 2; + } + + m_geom.push_back(geom); + m_domain.push_back(geom.Domain()); + m_ngrow.push_back(ngrow_finest); + m_chkpt_file_level.reserve(max_coarsening_level+1); + m_chkpt_file_level.emplace_back(this, chkpt_file, geom, EB2::max_grid_size, ngrow_finest, + extend_domain_face); + + for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) + { + bool coarsenable = m_geom.back().Domain().coarsenable(2,2); + if (!coarsenable) { + if (ilev <= required_coarsening_level) { + amrex::Abort("IndexSpaceImp: domain is not coarsenable at level "+std::to_string(ilev)); + } else { + break; + } + } + + int ng = (ilev > required_coarsening_level) ? 0 : m_ngrow.back()/2; + + Box cdomain = amrex::coarsen(m_geom.back().Domain(),2); + Geometry cgeom = amrex::coarsen(m_geom.back(),2); + m_chkpt_file_level.emplace_back(this, ilev, EB2::max_grid_size, ng, cgeom, m_chkpt_file_level[ilev-1]); + if (!m_chkpt_file_level.back().isOK()) { + m_chkpt_file_level.pop_back(); + if (ilev <= required_coarsening_level) { + if (build_coarse_level_by_coarsening) { + amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); + } else { + amrex::Abort("Chkptfile only stored for finest level. Failed to build "+std::to_string(ilev)); + } + } else { + break; + } + } + m_geom.push_back(cgeom); + m_domain.push_back(cdomain); + m_ngrow.push_back(ng); + } +} + +const Level& +IndexSpaceChkptFile::getLevel (const Geometry& geom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), geom.Domain()); + int i = std::distance(m_domain.begin(), it); + return m_chkpt_file_level[i]; +} + +const Geometry& +IndexSpaceChkptFile::getGeometry (const Box& dom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), dom); + int i = std::distance(m_domain.begin(), it); + return m_geom[i]; +} + +void +IndexSpaceChkptFile::addFineLevels (int /*num_new_fine_levels*/) +{ + amrex::Abort("IndexSpaceChkptFile::addFineLevels: not supported"); +} + +}} diff --git a/Src/EB/AMReX_EB2_Level.H b/Src/EB/AMReX_EB2_Level.H index d47917328c5..8ebc864b903 100644 --- a/Src/EB/AMReX_EB2_Level.H +++ b/Src/EB/AMReX_EB2_Level.H @@ -60,6 +60,8 @@ public: const Geometry& Geom () const noexcept { return m_geom; } IndexSpace const* getEBIndexSpace () const noexcept { return m_parent; } + void write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const; + protected: Level (Level && rhs) = default; @@ -98,12 +100,13 @@ class GShopLevel : public Level { public: - GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, int ngrow, bool extend_domain_face); + GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, + int ngrow, bool extend_domain_face, int num_crse_opt); GShopLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, GShopLevel& fineLevel); GShopLevel (IndexSpace const* is, const Geometry& geom); void define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); }; template @@ -113,7 +116,7 @@ GShopLevel::GShopLevel (IndexSpace const* is, const Geometry& geom) template GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : Level(is, geom) { if (std::is_same::value) { @@ -122,13 +125,13 @@ GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& return; } - define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } template void GShopLevel::define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) { if (amrex::Verbose() > 0 && extend_domain_face == false) { amrex::Print() << "AMReX WARNING: extend_domain_face=false is not recommended!\n"; @@ -166,57 +169,84 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, Box bounding_box = (extend_domain_face) ? domain : domain_grown; bounding_box.surroundingNodes(); - BoxList bl(domain); - bl.maxSize(max_grid_size); - if (m_ngrow != 0) { - const IntVect& domlo = domain.smallEnd(); - const IntVect& domhi = domain.bigEnd(); - for (auto& b : bl) { - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - if (m_ngrow[idim] != 0) { - if (b.smallEnd(idim) == domlo[idim]) { - b.growLo(idim,m_ngrow[idim]); - } - if (b.bigEnd(idim) == domhi[idim]) { - b.growHi(idim,m_ngrow[idim]); - } + BoxList cut_boxes; + BoxList covered_boxes; + + const int nprocs = ParallelDescriptor::NProcs(); + const int iproc = ParallelDescriptor::MyProc(); + + num_crse_opt = std::max(0,std::min(8,num_crse_opt)); + for (int clev = num_crse_opt; clev >= 0; --clev) { + IntVect crse_ratio(1 << clev); + if (domain.coarsenable(crse_ratio)) { + Box const& crse_bounding_box = amrex::coarsen(bounding_box, crse_ratio); + Geometry const& crse_geom = amrex::coarsen(geom, crse_ratio); + BoxList test_boxes; + if (cut_boxes.isEmpty()) { + covered_boxes.clear(); + test_boxes = BoxList(crse_geom.Domain()); + test_boxes.maxSize(max_grid_size); + } else { + test_boxes.swap(cut_boxes); + test_boxes.coarsen(crse_ratio); + test_boxes.maxSize(max_grid_size); + } + + const Long nboxes = test_boxes.size(); + const auto& boxes = test_boxes.data(); + for (Long i = iproc; i < nboxes; i += nprocs) { + const Box& vbx = boxes[i]; + const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); + auto box_type = gshop.getBoxType(gbx&crse_bounding_box,crse_geom,RunOn::Gpu); + if (box_type == gshop.allcovered) { + covered_boxes.push_back(amrex::refine(vbx, crse_ratio)); + } else if (box_type == gshop.mixedcells) { + cut_boxes.push_back(amrex::refine(vbx, crse_ratio)); } } + + amrex::AllGatherBoxes(cut_boxes.data()); } } - m_grids.define(std::move(bl)); - m_dmap.define(m_grids); - - Vector cut_boxes; - Vector covered_boxes; + amrex::AllGatherBoxes(covered_boxes.data()); - for (MFIter mfi(m_grids, m_dmap); mfi.isValid(); ++mfi) - { - const Box& vbx = mfi.validbox(); - const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); - int box_type = gshop.getBoxType(gbx & bounding_box, geom, RunOn::Gpu); - if (box_type == gshop.allcovered) { - covered_boxes.push_back(vbx); - } else if (box_type == gshop.mixedcells) { - cut_boxes.push_back(vbx); - } + if (m_ngrow != 0) { + auto grow_at_domain_boundary = [&] (BoxList& bl) + { + const IntVect& domlo = domain.smallEnd(); + const IntVect& domhi = domain.bigEnd(); + for (auto& b : bl) { + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (m_ngrow[idim] != 0) { + if (b.smallEnd(idim) == domlo[idim]) { + b.growLo(idim,m_ngrow[idim]); + } + if (b.bigEnd(idim) == domhi[idim]) { + b.growHi(idim,m_ngrow[idim]); + } + } + } + } + }; + grow_at_domain_boundary(covered_boxes); + grow_at_domain_boundary(cut_boxes); } - amrex::AllGatherBoxes(cut_boxes); - amrex::AllGatherBoxes(covered_boxes); - - if ( cut_boxes.empty() && - !covered_boxes.empty()) + if ( cut_boxes.isEmpty() && + !covered_boxes.isEmpty()) { amrex::Abort("AMReX_EB2_Level.H: Domain is completely covered"); } - if (!covered_boxes.empty()) { - m_covered_grids = BoxArray(BoxList(std::move(covered_boxes))); + if (!covered_boxes.isEmpty()) { + if (num_crse_opt > 2) { // don't want the box too big + covered_boxes.maxSize(max_grid_size*4); + } + m_covered_grids = BoxArray(std::move(covered_boxes)); } - if (cut_boxes.empty()) { + if (cut_boxes.isEmpty()) { m_grids = BoxArray(); m_dmap = DistributionMapping(); m_allregular = true; @@ -224,7 +254,7 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, return; } - m_grids = BoxArray(BoxList(std::move(cut_boxes))); + m_grids = BoxArray(std::move(cut_boxes)); m_dmap = DistributionMapping(m_grids); m_mgf.define(m_grids, m_dmap); diff --git a/Src/EB/AMReX_EB2_Level.cpp b/Src/EB/AMReX_EB2_Level.cpp index 46277b59ab1..09b6db4a54c 100644 --- a/Src/EB/AMReX_EB2_Level.cpp +++ b/Src/EB/AMReX_EB2_Level.cpp @@ -1,6 +1,7 @@ #include #include +#include #include namespace amrex { namespace EB2 { @@ -916,4 +917,14 @@ Level::fillLevelSet (MultiFab& levelset, const Geometry& geom) const } } +void +Level::write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const +{ + ChkptFile chkptFile(fname); + chkptFile.write_to_chkpt_file(m_grids, m_covered_grids, + m_volfrac, m_centroid, m_bndryarea, m_bndrycent, + m_bndrynorm, m_areafrac, m_facecent, m_edgecent, m_levelset, + m_geom, m_ngrow, extend_domain_face, max_grid_size); +} + }} diff --git a/Src/EB/AMReX_EB2_Level_STL.H b/Src/EB/AMReX_EB2_Level_STL.H index f29460d7a92..19cb31ef93b 100644 --- a/Src/EB/AMReX_EB2_Level_STL.H +++ b/Src/EB/AMReX_EB2_Level_STL.H @@ -13,7 +13,7 @@ class STLLevel public: STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, STLLevel& fineLevel); diff --git a/Src/EB/AMReX_EB2_Level_STL.cpp b/Src/EB/AMReX_EB2_Level_STL.cpp index 00f29958714..53243cd754a 100644 --- a/Src/EB/AMReX_EB2_Level_STL.cpp +++ b/Src/EB/AMReX_EB2_Level_STL.cpp @@ -3,12 +3,12 @@ namespace amrex { namespace EB2 { STLLevel::STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : GShopLevel(is, geom) { BL_PROFILE("EB2::STLLevel()-fine"); - define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } STLLevel::STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.H b/Src/EB/AMReX_EB2_Level_chkpt_file.H new file mode 100644 index 00000000000..881dd8f22f0 --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.H @@ -0,0 +1,31 @@ +#ifndef AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#define AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#include + +#include +#include + +namespace amrex { namespace EB2 { + +class ChkptFileLevel + : public GShopLevel +{ +public: + + ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, const Geometry& geom, + int max_grid_size, int ngrow, bool extend_domain_face); + + ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel); + +// for cuda support + void define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, + bool extend_domain_face); + + void finalize_cell_flags (); //sets the connection flags and adjustments to cellflags +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.cpp b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp new file mode 100644 index 00000000000..0b2d88e828f --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp @@ -0,0 +1,203 @@ +#include +#include + +#include + +namespace amrex { namespace EB2 { + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, bool extend_domain_face) + : GShopLevel(is, geom) +{ + BL_PROFILE("EB2::ChkptFileLevel()-fine"); + + define_fine_chkpt_file(chkpt_file, geom, max_grid_size, ngrow, extend_domain_face); +} + +void +ChkptFileLevel::define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, + int ngrow, bool extend_domain_face) +{ + BL_PROFILE("EB2::ChkptFileLevel()-define-fine-chkptfile"); + + m_ngrow = IntVect{static_cast(std::ceil(ngrow/16.)) * 16}; + + Box const& domain = geom.Domain(); + Box domain_grown = domain; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (geom.isPeriodic(idim)) { + m_ngrow[idim] = 0; + } else { + m_ngrow[idim] = std::min(m_ngrow[idim], domain_grown.length(idim)); + } + } + + const int ng = GFab::ng; + chkpt_file.read_from_chkpt_file(m_grids, m_covered_grids, + m_dmap, m_volfrac, m_centroid, m_bndryarea, + m_bndrycent, m_bndrynorm, m_areafrac, m_facecent, + m_edgecent, m_levelset, ng, geom, m_ngrow, + extend_domain_face, max_grid_size); + + + if ( m_grids.empty() && + !m_covered_grids.empty()) + { + Abort("AMReX_EB2_Level.H: Domain is completely covered"); + } + + if (m_grids.empty()) { + m_allregular = true; + m_ok = true; + return; + } + + + m_mgf.define(m_grids, m_dmap); + MFInfo mf_info; + m_cellflag.define(m_grids, m_dmap, 1, ng, mf_info); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + + const auto& levelset = m_levelset.const_array(mfi); + const Box& bxg2 = amrex::grow(gfab.validbox(),ng); + const Box& nodal_box = amrex::surroundingNodes(bxg2); + const auto& ls = gfab.getLevelSet().array(); + + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(nodal_box, i, j, k, + { + ls(i,j,k) = levelset(i,j,k); + }); + + auto& cellflag = m_cellflag[mfi]; + gfab.buildTypes(cellflag); + } + + finalize_cell_flags(); +} + +void +ChkptFileLevel::finalize_cell_flags () +{ + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { + EBCellFlagFab cellflagtmp; + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + const Box& vbx = mfi.validbox(); + const Box& bxg1 = amrex::grow(vbx,1); + Array4 const& cell = m_cellflag.array(mfi); + + cellflagtmp.resize(m_cellflag[mfi].box()); + Elixir cellflagtmp_eli = cellflagtmp.elixir(); + Array4 const& ctmp = cellflagtmp.array(); + + auto& facetype = gfab.getFaceType(); + AMREX_D_TERM(Array4 const& fx = facetype[0].array();, + Array4 const& fy = facetype[1].array();, + Array4 const& fz = facetype[2].array();); + + + AMREX_D_TERM(Array4 const& apx = m_areafrac[0].const_array(mfi);, + Array4 const& apy = m_areafrac[1].const_array(mfi);, + Array4 const& apz = m_areafrac[2].const_array(mfi);); + + const Box& xbx = amrex::grow(amrex::surroundingNodes(vbx,0),1); + AMREX_HOST_DEVICE_FOR_3D ( xbx, i, j, k, + { + if (apx(i,j,k) == 0.0_rt) { + fx(i,j,k) = Type::covered; + } else if (apx(i,j,k) == 1.0_rt) { + fx(i,j,k) = Type::regular; + } + }); + + const Box& ybx = amrex::grow(amrex::surroundingNodes(vbx,1),1); + AMREX_HOST_DEVICE_FOR_3D ( ybx, i, j, k, + { + if (apy(i,j,k) == 0.0_rt) { + fy(i,j,k) = Type::covered; + } else if (apy(i,j,k) == 1.0_rt) { + fy(i,j,k) = Type::regular; + } + }); + + #if (AMREX_SPACEDIM == 3) + const Box& zbx = amrex::grow(amrex::surroundingNodes(vbx,2),1); + AMREX_HOST_DEVICE_FOR_3D ( zbx, i, j, k, + { + if (apz(i,j,k) == 0.0_rt) { + fz(i,j,k) = Type::covered; + } else if (apz(i,j,k) == 1.0_rt) { + fz(i,j,k) = Type::regular; + } + }); + #endif + + + #if (AMREX_SPACEDIM == 2) + ignore_unused(ctmp); + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + ignore_unused(k); + if (cell(i,j,0).isSingleValued()) { + if (fx(i,j,0) == Type::regular && fx(i+1,j,0) == Type::regular && + fy(i,j,0) == Type::regular && fy(i,j+1,0) == Type::regular) + { + cell(i,j,0).setRegular(); + } + else if (fx(i,j,0) == Type::covered && fx(i+1,j,0) == Type::covered && + fy(i,j,0) == Type::covered && fy(i,j+1,0) == Type::covered) + { + cell(i,j,0).setCovered(); + } + } + }); + + set_connection_flags(bxg1, cell, fx, fy); + + #else + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + if (cell(i,j,k).isSingleValued()) { + if (fx(i,j,k) == Type::covered && fx(i+1,j,k) == Type::covered && + fy(i,j,k) == Type::covered && fy(i,j+1,k) == Type::covered && + fz(i,j,k) == Type::covered && fz(i,j,k+1) == Type::covered) + { + cell(i,j,k).setCovered(); + } + else if (fx(i,j,k) == Type::regular && fx(i+1,j,k) == Type::regular && + fy(i,j,k) == Type::regular && fy(i,j+1,k) == Type::regular && + fz(i,j,k) == Type::regular && fz(i,j,k+1) == Type::regular) + { + cell(i,j,k).setRegular(); + } + } + }); + + set_connection_flags(vbx, bxg1, cell, ctmp, fx, fy, fz); + + #endif + + } + + m_ok = true; + } +} + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel) +: GShopLevel(is, ilev, max_grid_size, ngrow, geom, fineLevel) +{} + +}} diff --git a/Src/EB/AMReX_EB_chkpt_file.H b/Src/EB/AMReX_EB_chkpt_file.H new file mode 100644 index 00000000000..781db55a1d8 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.H @@ -0,0 +1,60 @@ +#ifndef AMREX_EB_CHKPT_FILE_H_ +#define AMREX_EB_CHKPT_FILE_H_ + +#include + +namespace amrex { namespace EB2 { + +class ChkptFile +{ +private: + std::string m_restart_file = ""; + + const std::string m_volfrac_name = "volfrac"; + const std::string m_centroid_name = "centroid"; + const std::string m_bndryarea_name = "bndryarea"; + const std::string m_bndrycent_name = "bndrycent"; + const std::string m_bndrynorm_name = "bndrynorm"; + const std::string m_levelset_name = "levelset"; + + const amrex::Vector m_areafrac_name + = {AMREX_D_DECL("areafrac_x", "areafrac_y", "areafrac_z")}; + const amrex::Vector m_facecent_name + = {AMREX_D_DECL("facecent_x", "facecent_y", "facecent_z")}; + const amrex::Vector m_edgecent_name + = {AMREX_D_DECL("edgecent_x", "edgecent_y", "edgecent_z")}; + + void writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; + + void writeToFile (const MultiFab& mf, const std::string& mf_name) const; + + +public: + ChkptFile (const std::string &fname); + + void read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, MultiFab& bndryarea, + MultiFab& bndrycent, MultiFab& bndrynorm, + Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, int max_grid_size) const; + + void write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB_chkpt_file.cpp b/Src/EB/AMReX_EB_chkpt_file.cpp new file mode 100644 index 00000000000..cd1c00e9ee5 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.cpp @@ -0,0 +1,324 @@ +#include + +#include +#include +#include // amrex::VisMF::Write(MultiFab) +#include // amrex::[read,write]IntData(array_of_ints) + +namespace { + +const std::string level_prefix = "Level_"; + +void gotoNextLine (std::istream& is) +{ + constexpr std::streamsize bl_ignore_max { 100000 }; + is.ignore(bl_ignore_max, '\n'); +} + +} + +namespace amrex { namespace EB2 { + +// Header information includes the cut and covered boxes (if any) +// Checkpoint file contains data for cut boxes +void +ChkptFile::writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, + const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + if (ParallelDescriptor::IOProcessor()) + { + std::string HeaderFileName(m_restart_file + "/Header"); + VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size); + std::ofstream HeaderFile; + + HeaderFile.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size()); + + HeaderFile.open(HeaderFileName.c_str(), std::ofstream::out | + std::ofstream::trunc | + std::ofstream::binary); + + if ( ! HeaderFile.good() ) + FileOpenFailed(HeaderFileName); + + HeaderFile.precision(17); + + HeaderFile << "Checkpoint version: 1\n"; + + const int nlevels = 1; + HeaderFile << nlevels << "\n"; + + // Geometry + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbLo(i) << ' '; + HeaderFile << '\n'; + + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbHi(i) << ' '; + HeaderFile << '\n'; + + // ngrow + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << ngrow[i] << ' '; + HeaderFile << '\n'; + + // extend domain face + HeaderFile << extend_domain_face << "\n"; + + // max grid size + HeaderFile << max_grid_size << "\n"; + + // BoxArray + for (int lev = 0; lev < nlevels; ++lev) + { + cut_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + + if (! covered_ba.empty()) { + covered_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + } + } + } +} + +void +ChkptFile::writeToFile (const MultiFab& mf, const std::string& mf_name) const +{ + VisMF::Write(mf, MultiFabFileFullPrefix(0, m_restart_file, + level_prefix, mf_name)); +} + + +ChkptFile::ChkptFile (const std::string &fname) + : m_restart_file(fname) +{} + +void +ChkptFile::read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, + MultiFab& bndryarea, MultiFab& bndrycent, + MultiFab& bndrynorm, Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, + int max_grid_size) const +{ + Real prob_lo[AMREX_SPACEDIM]; + Real prob_hi[AMREX_SPACEDIM]; + + std::string File(m_restart_file + "/Header"); + + if (amrex::Verbose()) amrex::Print() << "file=" << File << std::endl; + + VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize()); + + Vector fileCharPtr; + ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr); + std::string fileCharPtrString(fileCharPtr.dataPtr()); + std::istringstream is(fileCharPtrString, std::istringstream::in); + + std::string line, word; + + std::getline(is, line); + + int nlevs; + is >> nlevs; + gotoNextLine(is); + AMREX_ASSERT(nlevs == 1); + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_lo[i++] = std::stod(word); + } + } + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_hi[i++] = std::stod(word); + } + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_lo[idim] - geom.ProbLo()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_hi[idim] - geom.ProbHi()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + } + + IntVect ngrow_chkptfile; + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + ngrow_chkptfile[i++] = std::stoi(word); + } + } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ngrow_chkptfile == ngrow_finest, "EB2::ChkptFile cannot read from different ngrow"); + + bool edf_chkptfile; + is >> edf_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(extend_domain_face == edf_chkptfile, + "EB2::ChkptFile cannot read from different extend_domain_face"); + + int mgs_chkptfile; + is >> mgs_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(max_grid_size == mgs_chkptfile, + "EB2::ChkptFile cannot read from different max_grid_size"); + + if (amrex::Verbose()) amrex::Print() << "Loading cut_grids\n"; + cut_grids.readFrom(is); + gotoNextLine(is); + + if (is.peek() != EOF) { + if (amrex::Verbose()) amrex::Print() << "Loading covered_grids\n"; + covered_grids.readFrom(is); + gotoNextLine(is); + } + + dmap.define(cut_grids, ParallelDescriptor::NProcs()); + + // volfrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_volfrac_name << std::endl; + + volfrac.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_volfrac_name); + VisMF::Read(volfrac, prefix); + } + + // centroid + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_centroid_name << std::endl; + + centroid.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_centroid_name); + VisMF::Read(centroid, prefix); + } + + // bndryarea + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndryarea_name << std::endl; + + bndryarea.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndryarea_name); + VisMF::Read(bndryarea, prefix); + } + + // bndrycent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrycent_name << std::endl; + + bndrycent.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrycent_name); + VisMF::Read(bndrycent, prefix); + } + + // bndrynorm + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrynorm_name << std::endl; + + bndrynorm.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrynorm_name); + VisMF::Read(bndrynorm, prefix); + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + // areafrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_areafrac_name[idim] << std::endl; + + areafrac[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_areafrac_name[idim]); + VisMF::Read(areafrac[idim], prefix); + } + + // facecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_facecent_name[idim] << std::endl; + + facecent[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, AMREX_SPACEDIM-1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_facecent_name[idim]); + VisMF::Read(facecent[idim], prefix); + } + + // edgecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_edgecent_name[idim] << std::endl; + + IntVect edge_type{1}; edge_type[idim] = 0; + edgecent[idim].define(convert(cut_grids, edge_type), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_edgecent_name[idim]); + VisMF::Read(edgecent[idim], prefix); + } + } + + // levelset + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_levelset_name << std::endl; + + levelset.define(convert(cut_grids,IntVect::TheNodeVector()), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_levelset_name); + VisMF::Read(levelset, prefix); + } +} + +void +ChkptFile::write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + + if (ParallelDescriptor::IOProcessor()) { + std::cout << "\n\t Writing checkpoint " << m_restart_file << std::endl; + } + + const int nlevels = 1; + PreBuildDirectorHierarchy(m_restart_file, level_prefix, nlevels, true); + + writeHeader(cut_grids, covered_grids, geom, ngrow, extend_domain_face, max_grid_size); + + writeToFile(volfrac, m_volfrac_name); + writeToFile(centroid, m_centroid_name); + writeToFile(bndryarea, m_bndryarea_name); + writeToFile(bndrycent, m_bndrycent_name); + writeToFile(bndrynorm, m_bndrynorm_name); + writeToFile(levelset, m_levelset_name); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + writeToFile(areafrac[idim], m_areafrac_name[idim]); + writeToFile(facecent[idim], m_facecent_name[idim]); + writeToFile(edgecent[idim], m_edgecent_name[idim]); + } +} + +}} diff --git a/Src/EB/AMReX_distFcnElement.H b/Src/EB/AMReX_distFcnElement.H index f839bdb5747..2a9c7a0c2f4 100644 --- a/Src/EB/AMReX_distFcnElement.H +++ b/Src/EB/AMReX_distFcnElement.H @@ -12,7 +12,7 @@ class distFcnElement2d { public: //! Constructor distFcnElement2d() {} - ~distFcnElement2d() {} + virtual ~distFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const = 0; @@ -29,7 +29,7 @@ class distFcnElement2d { class LineDistFcnElement2d: public distFcnElement2d { public: LineDistFcnElement2d() {} - ~LineDistFcnElement2d() {} + virtual ~LineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; @@ -58,7 +58,7 @@ class LineDistFcnElement2d: public distFcnElement2d { class SplineDistFcnElement2d: public distFcnElement2d { public: SplineDistFcnElement2d() {} - ~SplineDistFcnElement2d() {} + virtual ~SplineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; diff --git a/Src/EB/CMakeLists.txt b/Src/EB/CMakeLists.txt index 8ceb433e159..017e4d783a8 100644 --- a/Src/EB/CMakeLists.txt +++ b/Src/EB/CMakeLists.txt @@ -70,11 +70,17 @@ target_sources(amrex AMReX_EB2_${AMReX_SPACEDIM}D_C.H AMReX_EB_STL_utils.H AMReX_EB_STL_utils.cpp + AMReX_EB_chkpt_file.H + AMReX_EB_chkpt_file.cpp AMReX_EB_triGeomOps_K.H AMReX_EB2_Level_STL.H AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.H AMReX_EB2_IndexSpace_STL.cpp + AMReX_EB2_Level_chkpt_file.H + AMReX_EB2_Level_chkpt_file.cpp + AMReX_EB2_IndexSpace_chkpt_file.H + AMReX_EB2_IndexSpace_chkpt_file.cpp ) if (AMReX_SPACEDIM EQUAL 3) diff --git a/Src/EB/Make.package b/Src/EB/Make.package index 5865a2da982..b684523924f 100644 --- a/Src/EB/Make.package +++ b/Src/EB/Make.package @@ -79,6 +79,12 @@ CEXE_headers += AMReX_EB_triGeomOps_K.H CEXE_headers += AMReX_EB2_Level_STL.H AMReX_EB2_IndexSpace_STL.H CEXE_sources += AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.cpp +CEXE_sources += AMReX_EB_chkpt_file.cpp +CEXE_headers += AMReX_EB_chkpt_file.H + +CEXE_headers += AMReX_EB2_Level_chkpt_file.H AMReX_EB2_IndexSpace_chkpt_file.H +CEXE_sources += AMReX_EB2_Level_chkpt_file.cpp AMReX_EB2_IndexSpace_chkpt_file.cpp + ifeq ($(DIM),3) CEXE_sources += AMReX_WriteEBSurface.cpp AMReX_EBToPVD.cpp CEXE_headers += AMReX_WriteEBSurface.H AMReX_EBToPVD.H diff --git a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp index 021ed8c4f60..49a761da801 100644 --- a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp +++ b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp @@ -232,11 +232,8 @@ WriteGenericPlotfileHeaderHDF5 (hid_t fid, int ratio = 1; if (ref_ratio.size() > 0) - ratio = ref_ratio[level][0]; + ratio = (level == finest_level)? 1: ref_ratio[level][0]; - if (level == finest_level) { - ratio = 1; - } CreateWriteHDF5AttrInt(grp, "ref_ratio", 1, &ratio); for (int k = 0; k < AMREX_SPACEDIM; ++k) { diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.H b/Src/Extern/HYPRE/AMReX_HypreIJIface.H index 6d0dbacd95f..2ac96748b24 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.H +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.H @@ -93,11 +93,11 @@ private: HypreIntType (*m_precondSolvePtr)( HYPRE_Solver, HYPRE_ParCSRMatrix, HYPRE_ParVector, HYPRE_ParVector){nullptr}; - HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, double){nullptr}; - HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, double){nullptr}; + HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; + HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; HypreIntType (*m_solverSetMaxIterPtr)(HYPRE_Solver, HypreIntType){nullptr}; HypreIntType (*m_solverNumItersPtr)(HYPRE_Solver, HypreIntType*){nullptr}; - HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, double*){nullptr}; + HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, amrex::Real*){nullptr}; HypreIntType m_ilower{0}; HypreIntType m_iupper{0}; diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp index 9e7a42dbb5b..c2e4f126252 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp @@ -275,7 +275,7 @@ void HypreIJIface::boomeramg_precond_configure (const std::string& prefix) if (hpp.pp.contains("bamg_non_galerkin_level_tols")) { std::vector levels; - std::vector tols; + std::vector tols; hpp.pp.getarr("bamg_non_galerkin_level_levels", levels); hpp.pp.getarr("bamg_non_galerkin_level_tols", tols); diff --git a/Src/Extern/PETSc/AMReX_PETSc.cpp b/Src/Extern/PETSc/AMReX_PETSc.cpp index bf0bf68a99c..7d8cd79b582 100644 --- a/Src/Extern/PETSc/AMReX_PETSc.cpp +++ b/Src/Extern/PETSc/AMReX_PETSc.cpp @@ -1,7 +1,4 @@ -#include -#include - #ifdef AMREX_USE_EB #include #include @@ -9,6 +6,9 @@ #include +#include +#include + #include #include #include diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H index 061ff14c301..602a6298126 100644 --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H @@ -30,10 +30,10 @@ public: int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override; int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp index 135c21ef0e2..aa801eb0993 100644 --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp @@ -1,22 +1,22 @@ #include "AMReX_AmrDataAdaptor.H" +#include "senseiConfig.h" #include "MPIUtils.h" #include "STLUtils.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" #include "Profiler.h" #include "Error.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -62,15 +62,15 @@ int DescriptorMap::Initialize(const DescriptorList &descriptors) if (itype.cellCentered()) { - this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j); } else if (itype.nodeCentered()) { - this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j); } else { - this->Map[vtkDataObject::FIELD][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::FIELD][arrayName] = std::make_pair(i,j); } } } @@ -156,7 +156,7 @@ struct AmrDataAdaptor::InternalsType int PinMesh; amrex::InSituUtils::DescriptorMap SimMetadata; #if SENSEI_VERSION_MAJOR < 3 - std::vector ManagedObjects; + std::vector ManagedObjects; #endif std::vector> Masks; }; @@ -225,11 +225,11 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id, metadata->GlobalView = true; metadata->MeshName = "mesh"; - metadata->MeshType = VTK_OVERLAPPING_AMR; - metadata->BlockType = VTK_UNIFORM_GRID; + metadata->MeshType = SVTK_OVERLAPPING_AMR; + metadata->BlockType = SVTK_UNIFORM_GRID; metadata->NumBlocks = 0; metadata->NumBlocksLocal = {-1}; - metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum(); metadata->StaticMesh = 0; // TODO @@ -318,14 +318,14 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id, std::string arrayName = desc.name(j); metadata->ArrayName.push_back(arrayName); metadata->ArrayComponents.push_back(1); - metadata->ArrayType.push_back(InSituUtils::amrex_tt::vtk_type_enum()); + metadata->ArrayType.push_back(InSituUtils::amrex_tt::svtk_type_enum()); if (itype.cellCentered()) - metadata->ArrayCentering.push_back(vtkDataObject::CELL); + metadata->ArrayCentering.push_back(svtkDataObject::CELL); else if (itype.nodeCentered()) - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); else - metadata->ArrayCentering.push_back(vtkDataObject::FIELD); + metadata->ArrayCentering.push_back(svtkDataObject::FIELD); } } @@ -557,8 +557,8 @@ int AmrDataAdaptor::GetNumberOfArrays(const std::string &meshName, return -1; } - if ((association != vtkDataObject::POINT) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::POINT) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -590,7 +590,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName, if (this->Internals->SimMetadata.GetName(association, index, arrayName)) { SENSEI_ERROR("No array named \"" << arrayName << "\" in " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data") return -1; } @@ -603,7 +603,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName, //----------------------------------------------------------------------------- int AmrDataAdaptor::GetMesh(const std::string &meshName, - bool structureOnly, vtkDataObject *&mesh) + bool structureOnly, svtkDataObject *&mesh) { amrex::ignore_unused(structureOnly); @@ -626,8 +626,8 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, unsigned int nLevels = InSituUtils::NumActiveLevels(levels); - // initialize new vtk datasets - vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New(); + // initialize new svtk datasets + svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New(); #if SENSEI_VERSION_MAJOR < 3 Internals->ManagedObjects.push_back(amrMesh); #endif @@ -685,12 +685,12 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // vtk's representation of box metadata - vtkAMRBox block(cboxLo, cboxHi); + // svtk's representation of box metadata + svtkAMRBox block(cboxLo, cboxHi); amrMesh->SetAMRBox(i, j, block); amrMesh->SetAMRBlockSourceIndex(i, j, gid++); - // skip building a vtk amrMesh for the non local boxes + // skip building a svtk amrMesh for the non local boxes if (dmap[j] != rank) continue; @@ -705,14 +705,14 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())}; int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; - // new vtk uniform amrMesh, node centered - vtkUniformGrid *ug = vtkUniformGrid::New(); + // new svtk uniform amrMesh, node centered + svtkUniformGrid *ug = svtkUniformGrid::New(); ug->SetOrigin(origin); ug->SetSpacing(spacing); ug->SetExtent(nboxLo[0], nboxHi[0], nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]); - // pass the block into vtk + // pass the block into svtk amrMesh->SetDataSet(i, j, ug); ug->Delete(); } @@ -722,7 +722,7 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, +int AmrDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) { sensei::TimeEvent<64> event("AmrDataAdaptor::AddGhostCellsArray"); @@ -733,7 +733,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -780,7 +780,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, if (dMap[j] != rank) continue; - vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); + svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); if (!blockMesh) { @@ -790,24 +790,24 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, long nCells = blockMesh->GetNumberOfCells(); - // transfer mask array into vtk - vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New(); - ga->SetName("vtkGhostType"); + // transfer mask array into svtk + svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New(); + ga->SetName("svtkGhostType"); ga->SetArray(mask[j], nCells, 0); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); // for debug can visualize the ghost cells // FIXME -- a bug in Catalyst ignores internal ghost zones - // when using the VTK writrer. Until that bug gets fixed, one + // when using the SVTK writrer. Until that bug gets fixed, one // can manually inject this copy using a PV Python filter - ga = vtkUnsignedCharArray::New(); + ga = svtkUnsignedCharArray::New(); ga->SetName("GhostType"); ga->SetArray(mask[j], nCells, 1); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); - // because VTK takes ownership + // because SVTK takes ownership mask[j] = nullptr; } } @@ -816,7 +816,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, +int AmrDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh, const std::string &meshName) { amrex::ignore_unused(mesh); @@ -834,7 +834,7 @@ int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, +int AmrDataAdaptor::AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) { sensei::TimeEvent<64> event("AmrDataAdaptor::AddArray"); @@ -848,7 +848,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -861,8 +861,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, return -1; } - if ((association != vtkDataObject::CELL) && - (association != vtkDataObject::POINT)) + if ((association != svtkDataObject::CELL) && + (association != svtkDataObject::POINT)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -878,7 +878,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, if (this->Internals->SimMetadata.GetIndex(arrayName, association, fab, comp)) { SENSEI_ERROR("Failed to locate descriptor for " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data array \"" << arrayName << "\"") return -1; } @@ -894,8 +894,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, amrex::MultiFab& state = levels[i]->get_new_data(fab); unsigned int ng = state.nGrow(); - if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) && - !((association == vtkDataObject::POINT) && state.is_nodal())) + if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) && + !((association == svtkDataObject::POINT) && state.is_nodal())) { SENSEI_ERROR("association does not match MultiFAB centering") return -1; @@ -926,7 +926,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // skip building a vtk mesh for the non local boxes + // skip building a svtk mesh for the non local boxes if (dmap[j] != rank) continue; @@ -938,7 +938,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; // get the block mesh - vtkUniformGrid *ug = amrMesh->GetDataSet(i, j); + svtkUniformGrid *ug = amrMesh->GetDataSet(i, j); // node centered size long nlen = 1; @@ -953,9 +953,9 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, // pointer to the data amrex_real *pcd = state[j].dataPtr(comp); - // allocate vtk array - InSituUtils::amrex_tt::vtk_type *da = - InSituUtils::amrex_tt::vtk_type::New(); + // allocate svtk array + InSituUtils::amrex_tt::svtk_type *da = + InSituUtils::amrex_tt::svtk_type::New(); // set component name da->SetName(arrayName.c_str()); @@ -981,7 +981,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, #if defined(SENSEI_DEBUG) // mark level id - vtkFloatArray *la = vtkFloatArray::New(); + svtkFloatArray *la = svtkFloatArray::New(); la->SetName("amrex_level_id"); la->SetNumberOfTuples(clen); la->Fill(i); @@ -989,7 +989,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, la->Delete(); // mark mpi rank - vtkFloatArray *ra = vtkFloatArray::New(); + svtkFloatArray *ra = svtkFloatArray::New(); ra->SetName("amrex_mpi_rank"); ra->SetNumberOfTuples(clen); ra->Fill(rank); diff --git a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp index 26f63d2a101..018669a4bfc 100644 --- a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp @@ -29,7 +29,7 @@ AmrInSituBridge::update(Amr *dataSource) data_adaptor->SetDataSource(dataSource); data_adaptor->SetDataTime(dataSource->cumTime()); data_adaptor->SetDataTimeStep(dataSource->levelSteps(0)); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H index 5a8a88552af..54277505bd4 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H @@ -34,10 +34,10 @@ public: int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override; int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp index 2e4968cc8b2..34b92c1d25d 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp @@ -2,18 +2,18 @@ #include "Profiler.h" #include "Error.h" -#include "VTKUtils.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "SVTKUtils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -58,11 +58,11 @@ int MeshStateMap::Initialize( if (state.is_cell_centered()) { - this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j); } else if (state.is_nodal()) { - this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j); } } } @@ -83,7 +83,7 @@ struct AmrMeshDataAdaptor::InternalsType std::vector> Names; amrex::InSituUtils::MeshStateMap StateMetadata; #if SENSEI_VERSION_MAJOR < 3 - std::vector ManagedObjects; + std::vector ManagedObjects; #endif }; @@ -149,13 +149,13 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, metadata->GlobalView = true; metadata->MeshName = "mesh"; - metadata->MeshType = VTK_OVERLAPPING_AMR; - metadata->BlockType = VTK_UNIFORM_GRID; + metadata->MeshType = SVTK_OVERLAPPING_AMR; + metadata->BlockType = SVTK_UNIFORM_GRID; metadata->NumBlocks = 0; metadata->NumCells = 0; metadata->NumPoints = 0; metadata->NumBlocksLocal = {-1}; - metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum(); metadata->StaticMesh = 0; // num levels @@ -224,7 +224,7 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, {pdLo[0], pdHi[0], pdLo[1], pdHi[1], pdLo[2], pdHi[2]}); } - // global extent (note: VTK uses point centered indexing) + // global extent (note: SVTK uses point centered indexing) const amrex::Box& cdom = this->Internals->Mesh->Geom(0).Domain(); amrex::Box ndom = surroundingNodes(cdom); @@ -261,19 +261,19 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, // scalar, vector, tensor metadata->ArrayComponents[j] = 1; // POD type - metadata->ArrayType[j] = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->ArrayType[j] = InSituUtils::amrex_tt::svtk_type_enum(); // mesh centering if (state0.is_cell_centered()) { - metadata->ArrayCentering[j] = vtkDataObject::CELL; + metadata->ArrayCentering[j] = svtkDataObject::CELL; } else if (state0.is_nodal()) { - metadata->ArrayCentering[j] = vtkDataObject::POINT; + metadata->ArrayCentering[j] = svtkDataObject::POINT; } else { - metadata->ArrayCentering[j] = vtkDataObject::FIELD; + metadata->ArrayCentering[j] = svtkDataObject::FIELD; } } @@ -396,8 +396,8 @@ int AmrMeshDataAdaptor::GetNumberOfArrays(const std::string &meshName, return -1; } - if ((association != vtkDataObject::POINT) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::POINT) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -427,7 +427,7 @@ int AmrMeshDataAdaptor::GetArrayName(const std::string &meshName, if (this->Internals->StateMetadata.GetName(association, index, arrayName)) { SENSEI_ERROR("No array named \"" << arrayName << "\" in " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data") return -1; } @@ -475,7 +475,7 @@ int AmrMeshDataAdaptor::GetMeshHasGhostCells(const std::string &meshName, int &n //----------------------------------------------------------------------------- int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, - bool structureOnly, vtkDataObject *&mesh) + bool structureOnly, svtkDataObject *&mesh) { amrex::ignore_unused(structureOnly); @@ -498,8 +498,8 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int nLevels = this->Internals->Mesh->finestLevel() + 1; - // initialize new vtk datasets - vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New(); + // initialize new svtk datasets + svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New(); #if SENSEI_VERSION_MAJOR < 3 Internals->ManagedObjects.push_back(amrMesh); #endif @@ -560,12 +560,12 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // vtk's representation of box metadata - vtkAMRBox block(cboxLo, cboxHi); + // svtk's representation of box metadata + svtkAMRBox block(cboxLo, cboxHi); amrMesh->SetAMRBox(i, j, block); amrMesh->SetAMRBlockSourceIndex(i, j, gid++); - // skip building a vtk amrMesh for the non local boxes + // skip building a svtk amrMesh for the non local boxes if (dmap[j] != rank) continue; @@ -580,14 +580,14 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())}; int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; - // new vtk uniform amrMesh, node centered - vtkUniformGrid *ug = vtkUniformGrid::New(); + // new svtk uniform amrMesh, node centered + svtkUniformGrid *ug = svtkUniformGrid::New(); ug->SetOrigin(origin); ug->SetSpacing(spacing); ug->SetExtent(nboxLo[0], nboxHi[0], nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]); - // pass the block into vtk + // pass the block into svtk amrMesh->SetDataSet(i, j, ug); ug->Delete(); } @@ -597,7 +597,7 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, +int AmrMeshDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh, const std::string &meshName) { amrex::ignore_unused(mesh); @@ -613,7 +613,7 @@ int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, +int AmrMeshDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) { if (meshName != "mesh") @@ -622,7 +622,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -701,7 +701,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, if (dmap[j] != rank) continue; - vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); + svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); if (!blockMesh) { @@ -711,18 +711,18 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, long nCells = blockMesh->GetNumberOfCells(); - // transfer mask array into vtk - vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New(); - ga->SetName("vtkGhostType"); + // transfer mask array into svtk + svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New(); + ga->SetName("svtkGhostType"); ga->SetArray(mask[j], nCells, 0); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); // for debug can visualize the ghost cells // FIXME -- a bug in Catalyst ignores internal ghost zones - // when using the VTK writrer. Until that bug gets fixed, one + // when using the SVTK writer. Until that bug gets fixed, one // can manually inject this copy using a PV Python filter - ga = vtkUnsignedCharArray::New(); + ga = svtkUnsignedCharArray::New(); ga->SetName("GhostType"); ga->SetArray(mask[j], nCells, 1); blockMesh->GetCellData()->AddArray(ga); @@ -734,7 +734,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, +int AmrMeshDataAdaptor::AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) { @@ -747,7 +747,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -760,8 +760,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, return -1; } - if ((association != vtkDataObject::CELL) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::CELL) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -774,7 +774,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, if (this->Internals->StateMetadata.GetIndex(arrayName, association, fab, comp)) { SENSEI_ERROR("Failed to locate descriptor for " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data array \"" << arrayName << "\"") return -1; } @@ -792,8 +792,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, unsigned int ng = state.nGrow(); // check centering - if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) && - !((association == vtkDataObject::POINT) && state.is_nodal())) + if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) && + !((association == svtkDataObject::POINT) && state.is_nodal())) { SENSEI_ERROR("association does not match MultiFab centering") return -1; @@ -824,7 +824,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // skip building a vtk mesh for the non local boxes + // skip building a svtk mesh for the non local boxes if (dmap[j] != rank) continue; @@ -836,7 +836,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; // get the block mesh - vtkUniformGrid *ug = amrMesh->GetDataSet(i, j); + svtkUniformGrid *ug = amrMesh->GetDataSet(i, j); // node centered size long nlen = 1; @@ -851,9 +851,9 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, // pointer to the data amrex_real *pcd = state[j].dataPtr(comp); - // allocate vtk array - InSituUtils::amrex_tt::vtk_type *da = - InSituUtils::amrex_tt::vtk_type::New(); + // allocate svtk array + InSituUtils::amrex_tt::svtk_type *da = + InSituUtils::amrex_tt::svtk_type::New(); // set component name da->SetName(arrayName.c_str()); @@ -879,7 +879,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, #if defined(SENSEI_DEBUG) // mark level id - vtkFloatArray *la = vtkFloatArray::New(); + svtkFloatArray *la = svtkFloatArray::New(); la->SetName("amrex_level_id"); la->SetNumberOfTuples(clen); la->Fill(i); @@ -887,7 +887,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, la->Delete(); // mark mpi rank - vtkFloatArray *ra = vtkFloatArray::New(); + svtkFloatArray *ra = svtkFloatArray::New(); ra->SetName("amrex_mpi_rank"); ra->SetNumberOfTuples(clen); ra->Fill(rank); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp index 55adb1b5c59..cd6b6794171 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp @@ -35,7 +35,7 @@ AmrMeshInSituBridge::update(unsigned int step, double time, data_adaptor->SetDataSource(mesh, states, names); data_adaptor->SetDataTime(time); data_adaptor->SetDataTimeStep(step); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H index 61e4d510745..fbd5227824f 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H @@ -45,10 +45,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H index a93357d5043..4cbb53203b6 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H @@ -148,7 +148,7 @@ template int AmrMeshParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { if(meshName == m_meshName) { @@ -164,7 +164,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -181,7 +181,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -198,7 +198,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H index bede5908cdc..9208c8a753b 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H @@ -86,7 +86,7 @@ int AmrMeshParticleInSituBridge::update( data_adaptor->SetDataTime(time); data_adaptor->SetDataTimeStep(step); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H index 886a7df6d18..3f7a945e019 100644 --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H @@ -43,10 +43,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H index 813466fc0f8..9035cd0c39c 100644 --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H @@ -146,7 +146,7 @@ template int AmrParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { if(meshName == m_meshName) { @@ -162,7 +162,7 @@ int AmrParticleDataAdaptor::GetM template int AmrParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -179,7 +179,7 @@ int AmrParticleDataAdaptor::AddG template int AmrParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -196,7 +196,7 @@ int AmrParticleDataAdaptor::AddG template int AmrParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.H b/Src/Extern/SENSEI/AMReX_InSituUtils.H index e7c212d7b4e..2799e21b367 100644 --- a/Src/Extern/SENSEI/AMReX_InSituUtils.H +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.H @@ -2,10 +2,10 @@ #define AMReX_InSituUtils_H #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -47,21 +47,21 @@ enum PointGhostTypes -// traits helper for mapping between amrex_real and vtkDataArray +// traits helper for mapping between amrex_real and svtkDataArray template struct amrex_tt {}; -#define amrex_tt_specialize(cpp_t, vtk_t, vtk_t_e) \ +#define amrex_tt_specialize(cpp_t, svtk_t, svtk_t_e) \ template <> \ struct amrex_tt \ { \ - using vtk_type = vtk_t; \ + using svtk_type = svtk_t; \ \ static \ - constexpr int vtk_type_enum() { return vtk_t_e; } \ + constexpr int svtk_type_enum() { return svtk_t_e; } \ }; -amrex_tt_specialize(float, vtkFloatArray, VTK_FLOAT) -amrex_tt_specialize(double, vtkDoubleArray, VTK_DOUBLE) +amrex_tt_specialize(float, svtkFloatArray, SVTK_FLOAT) +amrex_tt_specialize(double, svtkDoubleArray, SVTK_DOUBLE) // helpers to modify values diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp index 64c429e8bb6..d13c8738aeb 100644 --- a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp @@ -1,7 +1,7 @@ #include "AMReX_InSituUtils.H" #include "Error.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" namespace amrex { namespace InSituUtils { @@ -14,7 +14,7 @@ int StateMap::GetIndex(const std::string &name, int centering, if (cit == this->Map.end()) { - SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering) + SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering) << " arrays") return -1; } @@ -23,7 +23,7 @@ int StateMap::GetIndex(const std::string &name, int centering, if (nit == cit->second.end()) { SENSEI_ERROR("No array named \"" << name << "\" in " - << sensei::VTKUtils::GetAttributesName(centering) + << sensei::SVTKUtils::GetAttributesName(centering) << " centered data") return -1; } @@ -41,7 +41,7 @@ int StateMap::GetName(int centering, int id, std::string &name) if (cit == this->Map.end()) { - SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering) + SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering) << " arrays") return -1; } diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H index 73ca142ec0b..f284b15831b 100644 --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H @@ -8,7 +8,7 @@ #include #include -class vtkPolyData; +class svtkPolyData; namespace amrex { @@ -40,22 +40,22 @@ public: void SetPinMesh(int val); // get particle id numbers - int AddParticlesIDArray(vtkDataObject* mesh); + int AddParticlesIDArray(svtkDataObject* mesh); // get particle cpu numbers (process each particle was generated on) - int AddParticlesCPUArray(vtkDataObject* mesh); + int AddParticlesCPUArray(svtkDataObject* mesh); // get particle integer arrays in Structs of Arrays format - int AddParticlesSOAIntArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesSOAIntArray(const std::string &arrayName, svtkDataObject* mesh); // get particle real arrays in Structs of Arrays format - int AddParticlesSOARealArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesSOARealArray(const std::string &arrayName, svtkDataObject* mesh); // get particle integer arrays in Array Of Structs format - int AddParticlesAOSIntArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesAOSIntArray(const std::string &arrayName, svtkDataObject* mesh); // get particle real arrays in Array Of Structs format - int AddParticlesAOSRealArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesAOSRealArray(const std::string &arrayName, svtkDataObject* mesh); // SENSEI API #if SENSEI_VERSION_MAJOR >= 3 @@ -68,10 +68,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: @@ -79,7 +79,7 @@ protected: ~ParticleDataAdaptor() = default; private: - vtkPolyData* BuildParticles(); + svtkPolyData* BuildParticles(); const std::string m_particlesName = "particles"; diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H index 26174f83e1b..8a2d15562d3 100644 --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H @@ -1,13 +1,13 @@ #include "Profiler.h" #include "Error.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" #include "MeshMetadata.h" -// vtk includes -#include -#include -#include -#include -#include +// svtk includes +#include +#include +#include +#include +#include @@ -194,7 +194,7 @@ int ParticleDataAdaptor::GetNumb unsigned int &numberOfArrays) { numberOfArrays = 0; - if(association == vtkDataObject::POINT) + if(association == svtkDataObject::POINT) { numberOfArrays = m_realStructs.size() + m_intStructs.size() @@ -213,7 +213,7 @@ int ParticleDataAdaptor::GetArra unsigned int index, std::string &arrayName) { - if(association == vtkDataObject::POINT) + if(association == svtkDataObject::POINT) { if(index < m_realStructs.size()) { @@ -253,7 +253,7 @@ template int ParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { mesh = nullptr; int nprocs = 1; @@ -266,7 +266,7 @@ int ParticleDataAdaptor::GetMesh SENSEI_ERROR("No mesh named \"" << meshName << "\"") return -1; } - vtkMultiBlockDataSet* mb = vtkMultiBlockDataSet::New(); + svtkMultiBlockDataSet* mb = svtkMultiBlockDataSet::New(); if (structureOnly) { @@ -275,7 +275,7 @@ int ParticleDataAdaptor::GetMesh } mb->SetNumberOfBlocks(nprocs); - vtkPolyData *pd = BuildParticles(); + svtkPolyData *pd = BuildParticles(); mb->SetBlock(rank, pd); pd->Delete(); mesh = mb; @@ -286,7 +286,7 @@ int ParticleDataAdaptor::GetMesh //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject*, + svtkDataObject*, const std::string &meshName) { if (meshName != m_particlesName) @@ -300,7 +300,7 @@ int ParticleDataAdaptor::AddGhos //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject*, + svtkDataObject*, const std::string &meshName) { if (meshName != m_particlesName) @@ -314,7 +314,7 @@ int ParticleDataAdaptor::AddGhos //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) @@ -325,7 +325,7 @@ int ParticleDataAdaptor::AddArra return -1; } - if (association != vtkDataObject::POINT) + if (association != svtkDataObject::POINT) { SENSEI_ERROR("Invalid association " << association); return -1; @@ -393,10 +393,10 @@ int ParticleDataAdaptor::GetMesh metadata->MeshName = m_particlesName; // container mesh type (all) - metadata->MeshType = VTK_MULTIBLOCK_DATA_SET; + metadata->MeshType = SVTK_MULTIBLOCK_DATA_SET; // block mesh type (all) - metadata->BlockType = VTK_POLY_DATA; + metadata->BlockType = SVTK_POLY_DATA; // global number of blocks (all) metadata->NumBlocks = nprocs; @@ -412,9 +412,9 @@ int ParticleDataAdaptor::GetMesh // type enum of point data (unstructured, optional) #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->CoordinateType = VTK_FLOAT; + metadata->CoordinateType = SVTK_FLOAT; #else - metadata->CoordinateType = VTK_DOUBLE; + metadata->CoordinateType = SVTK_DOUBLE; #endif // total number of points in all blocks (all, optional) @@ -467,19 +467,19 @@ int ParticleDataAdaptor::GetMesh metadata->ArrayCentering = {}; for(auto s : m_realStructs) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_intStructs) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_realArrays) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_intArrays) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } // number of components of each array (all) @@ -506,26 +506,26 @@ int ParticleDataAdaptor::GetMesh for(auto s : m_realStructs) { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->ArrayType.push_back(VTK_FLOAT); + metadata->ArrayType.push_back(SVTK_FLOAT); #else - metadata->ArrayType.push_back(VTK_DOUBLE); + metadata->ArrayType.push_back(SVTK_DOUBLE); #endif } for(auto s : m_intStructs) { - metadata->ArrayType.push_back(VTK_INT); + metadata->ArrayType.push_back(SVTK_INT); } for(auto s : m_realArrays) { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->ArrayType.push_back(VTK_FLOAT); + metadata->ArrayType.push_back(SVTK_FLOAT); #else - metadata->ArrayType.push_back(VTK_DOUBLE); + metadata->ArrayType.push_back(SVTK_DOUBLE); #endif } for(auto s : m_intArrays) { - metadata->ArrayType.push_back(VTK_INT); + metadata->ArrayType.push_back(SVTK_INT); } // global min,max of each array (all, optional) @@ -646,19 +646,19 @@ int ParticleDataAdaptor::GetMesh //----------------------------------------------------------------------------- template -vtkPolyData* ParticleDataAdaptor::BuildParticles() +svtkPolyData* ParticleDataAdaptor::BuildParticles() { // return particle data pd - vtkPolyData* pd = vtkPolyData::New(); + svtkPolyData* pd = svtkPolyData::New(); const auto& particles = this->m_particles->GetParticles(); long long numParticles = this->m_particles->TotalNumberOfParticles(true, true); // allocate vertex storage for particles #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew coords; + svtkNew coords; #else - vtkNew coords; + svtkNew coords; #endif coords->SetName("coords"); coords->SetNumberOfComponents(3); @@ -669,12 +669,12 @@ vtkPolyData* ParticleDataAdaptor double *pCoords = coords->GetPointer(0); #endif - // use this to index into the VTK array as we copy level by level and tile by + // use this to index into the SVTK array as we copy level by level and tile by // tile long long ptId = 0; // allocate connectivity array for particles - vtkNew vertex; + svtkNew vertex; vertex->AllocateExact(numParticles, 1); // points->SetNumberOfPoints(numParticles); @@ -717,8 +717,8 @@ vtkPolyData* ParticleDataAdaptor } } - // pass the particle coordinates into VTK's point data structure. - vtkNew points; + // pass the particle coordinates into SVTK's point data structure. + svtkNew points; points->SetData(coords); // add point and vertex data to output mesh @@ -731,14 +731,14 @@ vtkPolyData* ParticleDataAdaptor //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddParticlesIDArray( - vtkDataObject* mesh) + svtkDataObject* mesh) { - auto vtk_particles = dynamic_cast(mesh); + auto svtk_particles = dynamic_cast(mesh); const auto& particles = this->m_particles->GetParticles(); auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); - // allocate a VTK array for the data - vtkNew idArray; + // allocate a SVTK array for the data + svtkNew idArray; idArray->SetName("id"); idArray->SetNumberOfComponents(1); idArray->SetNumberOfValues(nptsOnProc); @@ -767,8 +767,8 @@ int ParticleDataAdaptor::AddPart } } - // the association for this array is vtkDataObject::POINT - vtk_particles->GetPointData()->AddArray(idArray); + // the association for this array is svtkDataObject::POINT + svtk_particles->GetPointData()->AddArray(idArray); return 0; } @@ -776,14 +776,14 @@ int ParticleDataAdaptor::AddPart //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddParticlesCPUArray( - vtkDataObject* mesh) + svtkDataObject* mesh) { - auto vtk_particles = dynamic_cast(mesh); + auto svtk_particles = dynamic_cast(mesh); const auto& particles = this->m_particles->GetParticles(); auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); - // allocate a VTK array for the data - vtkNew cpuArray; + // allocate a SVTK array for the data + svtkNew cpuArray; cpuArray->SetName("cpu"); cpuArray->SetNumberOfComponents(1); cpuArray->SetNumberOfValues(nptsOnProc); @@ -811,8 +811,8 @@ int ParticleDataAdaptor::AddPart } } - // the association for this array is vtkDataObject::POINT - vtk_particles->GetPointData()->AddArray(cpuArray); + // the association for this array is svtkDataObject::POINT + svtk_particles->GetPointData()->AddArray(cpuArray); return 0; } @@ -821,7 +821,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesSOARealArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { const long nParticles = this->m_particles->TotalNumberOfParticles(true, true); @@ -847,11 +847,11 @@ int ParticleDataAdaptor::AddPart } } - // allocate the vtkArray + // allocate the svtkArray #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew data; + svtkNew data; #else - vtkNew data; + svtkNew data; #endif data->SetName(arrayName.c_str()); data->SetNumberOfComponents(nComps); @@ -896,9 +896,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -908,7 +908,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesSOAIntArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); @@ -931,7 +931,7 @@ int ParticleDataAdaptor::AddPart return -1; } - vtkNew data; + svtkNew data; data->SetName(arrayName.c_str()); data->SetNumberOfComponents(1); data->SetNumberOfValues(nptsOnProc); @@ -967,9 +967,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -979,7 +979,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesAOSRealArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container const auto& particles = this->m_particles->GetParticles(); @@ -1007,11 +1007,11 @@ int ParticleDataAdaptor::AddPart } } - // allocate the vtk array + // allocate the svtk array #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew data; + svtkNew data; #else - vtkNew data; + svtkNew data; #endif data->SetName(arrayName.c_str()); @@ -1053,9 +1053,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -1065,7 +1065,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesAOSIntArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container const auto& particles = this->m_particles->GetParticles(); @@ -1090,8 +1090,8 @@ int ParticleDataAdaptor::AddPart return -1; } - // allocate vtkArray - vtkNew data; + // allocate svtkArray + svtkNew data; data->SetName(arrayName.c_str()); data->SetNumberOfComponents(1); data->SetNumberOfValues(nptsOnProc); @@ -1121,9 +1121,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index bbefab67999..63de2af0113 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -98,3 +98,15 @@ if (AMReX_HYPRE) MLMG/AMReX_MLNodeLaplacian_hypre.cpp ) endif () + +if (AMReX_SPACEDIM EQUAL 3) + + target_include_directories(amrex PUBLIC $) + + target_sources(amrex + PRIVATE + OpenBC/AMReX_OpenBC.H + OpenBC/AMReX_OpenBC_K.H + OpenBC/AMReX_OpenBC.cpp + ) +endif () diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp index 89dbb268e10..e5a9b0b31af 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp @@ -323,10 +323,10 @@ MLABecLaplacian::applyMetricTermsCoeffs () for (int alev = 0; alev < m_num_amr_levels; ++alev) { const int mglev = 0; - applyMetricTerm(alev, mglev, m_a_coeffs[alev][mglev]); + applyMetricTermToMF(alev, mglev, m_a_coeffs[alev][mglev]); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - applyMetricTerm(alev, mglev, m_b_coeffs[alev][mglev][idim]); + applyMetricTermToMF(alev, mglev, m_b_coeffs[alev][mglev][idim]); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 45464bbeb9c..a33d70b4771 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -40,6 +40,11 @@ public: Real eps_rel, Real eps_abs); + int solve (Any& solnL, + const Any& rhsL, + Real eps_rel, + Real eps_abs); + void setVerbose (int _verbose) { verbose = _verbose; } int getVerbose () const { return verbose; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp index c32b0d6199d..76144e6d42f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp @@ -78,6 +78,13 @@ MLCGSolver::solve (MultiFab& sol, } } +int +MLCGSolver::solve (Any& sol, const Any& rhs, Real eps_rel, Real eps_abs) +{ + AMREX_ASSERT(sol.is()); // xxxxx TODO: MLCGSolver Any + return solve(sol.get(), rhs.get(), eps_rel, eps_abs); +} + int MLCGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 985bc9855b4..0cc6456b7c8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -59,9 +59,13 @@ public: virtual MultiFab const* getACoeffs (int amrlev, int mglev) const = 0; virtual Array getBCoeffs (int amrlev, int mglev) const = 0; - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const final override; + virtual void applyInhomogNeumannTerm (int amrlev, Any& rhs) const final override; - virtual void applyOverset (int amlev, MultiFab& rhs) const override; + virtual void addInhomogNeumannFlux ( + int amrlev, const Array& grad, + MultiFab const& sol, bool mult_bcoef) const final override; + + virtual void applyOverset (int amlev, Any& rhs) const override; #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypre (Hypre::Interface hypre_interface) const override; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp index b5580b3c15c..db57162c21f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp @@ -108,7 +108,7 @@ MLCellABecLap::define (const Vector& a_geom, amrlev = 0; for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { MultiFab foo(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 0, MFInfo().SetAlloc(false)); - if (! isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { + if (! amrex::isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { auto osm = std::make_unique(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 1); osm->ParallelCopy(*m_overset_mask[amrlev][mglev]); @@ -189,17 +189,21 @@ MLCellABecLap::getFluxes (const Vector >& a_flux a_flux[alev][idim]->mult(betainv); } } + addInhomogNeumannFlux(alev, a_flux[alev], *a_sol[alev], true); } } void -MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const +MLCellABecLap::applyInhomogNeumannTerm (int amrlev, Any& a_rhs) const { bool has_inhomog_neumann = hasInhomogNeumannBC(); bool has_robin = hasRobinBC(); if (!has_inhomog_neumann && !has_robin) return; + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + int ncomp = getNComp(); const int mglev = 0; @@ -414,9 +418,121 @@ MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const } void -MLCellABecLap::applyOverset (int amrlev, MultiFab& rhs) const +MLCellABecLap::addInhomogNeumannFlux ( + int amrlev, const Array& grad, MultiFab const& sol, + bool mult_bcoef) const +{ + /* + * if (mult_bcoef == true) + * grad is -bceof*grad phi + * else + * grad is grad phi + */ + Real fac = mult_bcoef ? Real(-1.0) : Real(1.0); + + bool has_inhomog_neumann = hasInhomogNeumannBC(); + bool has_robin = hasRobinBC(); + + if (!has_inhomog_neumann && !has_robin) return; + + int ncomp = getNComp(); + const int mglev = 0; + + const auto dxinv = m_geom[amrlev][mglev].InvCellSize(); + const Box domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + + Array bcoef = {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (mult_bcoef) { + bcoef = getBCoeffs(amrlev,mglev); + } + + const auto& bndry = *m_bndry_sol[amrlev]; + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter orit; orit.isValid(); ++orit) { + const Orientation ori = orit(); + const int idim = ori.coordDir(); + const Box& ccb = amrex::adjCell(vbx, ori); + const Dim3 os = IntVect::TheDimensionVector(idim).dim3(); + const Real dxi = dxinv[idim]; + if (! domain.contains(ccb)) { + for (int icomp = 0; icomp < ncomp; ++icomp) { + auto const& phi = sol.const_array(mfi,icomp); + auto const bv = bndry.bndryValues(ori).multiFab().const_array(mfi,icomp); + auto const bc = bcoef[idim] ? bcoef[idim]->const_array(mfi,icomp) + : Array4{}; + auto const& f = grad[idim]->array(mfi,icomp); + if (ori.isLow()) { + if (m_lobc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*bv(i,j,k); + }); + } else if (m_lobc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*dxi*((Real(1.0)-RB)*phi(ii,jj,kk)-RA); + }); + } + } else { + if (m_hibc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*bv(i,j,k); + }); + } else if (m_hibc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*dxi*(RA+(RB-Real(1.0))* + phi(i-os.x,j-os.y,k-os.z)); + }); + } + } + } + } + } + } +} + + +void +MLCellABecLap::applyOverset (int amrlev, Any& a_rhs) const { if (m_overset_mask[amrlev][0]) { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); const int ncomp = getNComp(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index f1168e5c41e..9a6bb222113 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -109,6 +110,8 @@ public: virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const override; + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, const MultiFab& fine_sol, const MultiFab& fine_rhs) override; @@ -132,9 +135,12 @@ public: virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location loc) const override; - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; + virtual void applyMetricTerm (int amrlev, int mglev, Any& rhs) const final override; virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void prepareForSolve () override; @@ -146,6 +152,23 @@ public: const Array& flux, const FArrayBox& sol, Location loc, const int face_only=0) const = 0; + // This could be turned into template if needed. + void applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const override; + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + + virtual void addInhomogNeumannFlux (int /*amrlev*/, + const Array& /*grad*/, + MultiFab const& /*sol*/, + bool /*mult_bcoef*/) const {} + struct BCTL { BoundCond type; Real location; @@ -210,12 +233,17 @@ protected: // boundary cell flags for covered, not_covered, outside_domain Vector > > m_maskvals; + Vector > m_norm_fine_mask; + mutable Vector m_fluxreg; private: void defineAuxData (); void defineBC (); + + void computeVolInv () const; + mutable Vector > m_volinv; // used by solvability fix }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp index 8f6921950e7..5c8edcbb1a6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifndef BL_NO_FORT @@ -9,6 +10,11 @@ namespace amrex { +#ifdef AMREX_SOFT_PERF_COUNTERS +// perf_counters +MLCellLinOp::Counters MLCellLinOp::perf_counters; +#endif + namespace { // Have to put it here due to CUDA extended lambda limitation struct ABCTag { @@ -97,6 +103,7 @@ MLCellLinOp::defineAuxData () m_undrrelxr.resize(m_num_amr_levels); m_maskvals.resize(m_num_amr_levels); m_fluxreg.resize(m_num_amr_levels-1); + m_norm_fine_mask.resize(m_num_amr_levels-1); const int ncomp = getNComp(); @@ -136,6 +143,9 @@ MLCellLinOp::defineAuxData () m_dmap[amrlev+1][0], m_dmap[amrlev][0], m_geom[amrlev+1][0], m_geom[amrlev][0], ratio, amrlev+1, ncomp); + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(m_grids[amrlev][0], m_dmap[amrlev][0], m_grids[amrlev+1][0], + ratio, 1, 0)); } #if (AMREX_SPACEDIM != 3) @@ -530,18 +540,6 @@ MLCellLinOp::solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const M MultiFab::Xpay(resid, Real(-1.0), b, 0, 0, ncomp, 0); } -void -MLCellLinOp::fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata) -{ - BL_PROFILE("MLCellLinOp::fillSolutionBC()"); - if (crse_bcdata != nullptr) { - updateSolBC(amrlev, *crse_bcdata); - } - const int mglev = 0; - applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, - m_bndry_sol[amrlev].get()); -} - void MLCellLinOp::correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, BCMode bc_mode, const MultiFab* crse_bcdata) @@ -940,6 +938,8 @@ MLCellLinOp::compGrad (int amrlev, const Array& grad, }); #endif } + + addInhomogNeumannFlux(amrlev, grad, sol, false); } void @@ -1316,7 +1316,20 @@ MLCellLinOp::BndryCondLoc::setLOBndryConds (const Geometry& geom, const Real* dx } void -MLCellLinOp::applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const +MLCellLinOp::applyMetricTerm (int amrlev, int mglev, Any& rhs) const +{ + amrex::ignore_unused(amrlev,mglev,rhs); +#if (AMREX_SPACEDIM != 3) + + if (!m_has_metric_term) return; + + AMREX_ASSERT(rhs.is()); + applyMetricTermToMF(amrlev, mglev, rhs.get()); +#endif +} + +void +MLCellLinOp::applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const { amrex::ignore_unused(amrlev,mglev,rhs); #if (AMREX_SPACEDIM != 3) @@ -1435,9 +1448,417 @@ MLCellLinOp::update () if (MLLinOp::needsUpdate()) MLLinOp::update(); } -#ifdef AMREX_SOFT_PERF_COUNTERS -// perf_counters -MLCellLinOp::Counters MLCellLinOp::perf_counters; +void +MLCellLinOp::computeVolInv () const +{ + if (!m_volinv.empty()) return; + + m_volinv.resize(m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_volinv[amrlev].resize(NMGLevels(amrlev)); + } + + // We don't need to compute for every level + + auto f = [&] (int amrlev, int mglev) { +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + m_volinv[amrlev][mglev] = vfrac.sum(0,true); + } + else +#endif + { + m_volinv[amrlev][mglev] + = Real(1.0 / compactify(Geom(amrlev,mglev).Domain()).d_numPts()); + } + }; + + // amrlev = 0, mglev = 0 + f(0,0); + + int mgbottom = NMGLevels(0)-1; + f(0,mgbottom); + +#ifdef AMREX_USE_EB + Real temp1, temp2; + auto factory = dynamic_cast(Factory(0,0)); + if (factory) + { + ParallelAllReduce::Sum({m_volinv[0][0], m_volinv[0][mgbottom]}, + ParallelContext::CommunicatorSub()); + temp1 = Real(1.0)/m_volinv[0][0]; + temp2 = Real(1.0)/m_volinv[0][mgbottom]; + } + else + { + temp1 = m_volinv[0][0]; + temp2 = m_volinv[0][mgbottom]; + } + m_volinv[0][0] = temp1; + m_volinv[0][mgbottom] = temp2; +#endif +} + +Vector +MLCellLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const +{ + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + + computeVolInv(); + + const int ncomp = getNComp(); + Vector offset(ncomp); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + for (int c = 0; c < ncomp; ++c) { + offset[c] = MultiFab::Dot(rhs, c, vfrac, 0, 1, 0, true) * m_volinv[amrlev][mglev]; + } + } + else +#endif + { + for (int c = 0; c < ncomp; ++c) { + offset[c] = rhs.sum(c,true) * m_volinv[amrlev][mglev]; + } + } + + ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); + + return offset; +} + +Real +MLCellLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + Real norm = 0._rt; +#ifdef AMREX_USE_EB + const int ncomp = getNComp(); + if (! mf.isAllRegular()) { + auto factory = dynamic_cast(Factory(amrlev)); + const MultiFab& vfrac = factory->getVolFrac(); + if (amrlev == finest_level) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = m_norm_fine_mask[amrlev]->const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = m_norm_fine_mask[amrlev]->const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + } + }); + } + } + } + } else +#endif + { + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + norm = MFNormInf(mf, fine_mask, true); + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLCellLinOp::AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is() && fres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), AMRRefRatio(clev)); +} + +void +MLCellLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& /*nghost*/) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(famrlev)); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& ff = fine.array(mfi); + Array4 const& cc = crse.const_array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (factory) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); + } + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); + } + } + } +} + +void +MLCellLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + const IntVect ng = crse.nGrowVect(); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, ng); + cfine.setVal(0.0); + cfine.ParallelCopy(crse, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); + cmf = & cfine; + } + + bool isEB = fine.hasEBFabFactory(); + ignore_unused(isEB); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(&(fine.Factory())); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const auto& ff = fine.array(mfi); + const auto& cc = cmf->array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (isEB) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { +#if (AMREX_SPACEDIM == 3) + if (hasHiddenDimension()) { + Box const& bx_2d = compactify(bx); + auto const& ff_2d = compactify(ff); + auto const& cc_2d = compactify(cc); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, + { + TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); + }); + } else #endif + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + } + } + } +} + +void +MLCellLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + int ncomp = getNComp(); + for (int falev = NAMRLevels()-1; falev > 0; --falev) + { +#ifdef AMREX_USE_EB + amrex::EB_average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#else + amrex::average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#endif + } +} + +void +MLCellLinOp::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& offset) const +{ + amrex::ignore_unused(amrlev, mglev); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + + const int ncomp = getNComp(); + for (int c = 0; c < ncomp; ++c) { + rhs.plus(-offset[c], c, 1); + } +#ifdef AMREX_USE_EB + if (rhs.hasEBFabFactory()) { + Vector val(ncomp, 0.0_rt); + amrex::EB_set_covered(rhs, 0, ncomp, val); + } +#endif +} } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index a006976dc08..c8bea8dd2d2 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -34,7 +34,8 @@ MLEBABecLap::MLEBABecLap (const Vector& a_geom, std::unique_ptr > MLEBABecLap::makeFactory (int amrlev, int mglev) const { - return makeEBFabFactory(m_geom[amrlev][mglev], + return makeEBFabFactory(static_cast(Factory(0,0))->getEBIndexSpace(), + m_geom[amrlev][mglev], m_grids[amrlev][mglev], m_dmap[amrlev][mglev], {1,1,1}, EBSupport::full); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H index 1b490726405..08439f9f99b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H @@ -200,7 +200,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - F && xeb, Real dr, Real dz, Real rlo) noexcept + F && xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -211,11 +211,11 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - out = Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + out = Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - out = Real(4.0) * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); + out = Real(4.0) * sigr * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); scale = hp; } } else { @@ -235,7 +235,7 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, tmp += (xeb(i-1,j,k) - x(i,j,k)) / hm * (r - Real(0.5) * hp * dr); } - out = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); + out = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -266,29 +266,29 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real xeb, Real dr, Real dz, Real rlo) noexcept + Real xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int, int, int) -> Real { return xeb; }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Array4 const& xeb, Real dr, Real dz, Real rlo) noexcept + Array4 const& xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int i1, int i2, int i3) -> Real { return xeb(i1,i2,i3); }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, - Real dr, Real dz, Real rlo) noexcept + Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -296,11 +296,11 @@ void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Real Ax = (x(i,j-1,k) - Real(2.0)*x(i,j,k) + x(i,j+1,k)) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + Ax += Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + Ax += sigr * (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); } y(i,j,k) = Ax; } @@ -310,7 +310,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -322,12 +322,12 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - Ax = (Real(4.0) / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma = -(Real(4.0) / (dr*dr)); + Ax = (Real(4.0) * sigr / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma = -(Real(4.0) * sigr / (dr*dr)); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - gamma = -(Real(4.0) / (dr*dr*hp*hp)); + gamma = -(Real(4.0) * sigr / (dr*dr*hp*hp)); Ax = gamma * x(i,j,k); scale = hp; } @@ -352,8 +352,8 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, tmp0 += Real(-1.0) / hm * (r - Real(0.5) * hp * dr); } - Ax = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); - gamma = tmp0 * Real(2.0) / ((hp+hm) * r * dr * dr); + Ax = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); + gamma = tmp0 * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -390,7 +390,7 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -400,13 +400,13 @@ void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Real gamma = -Real(2.0) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += (Real(4.0)/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma += -(Real(4.0)/(dr*dr)); + Ax += (Real(4.0)*sigr/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma += -(Real(4.0)*sigr/(dr*dr)); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); - gamma += -(rp+rm) / (r*dr*dr); + Ax += sigr*(rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + gamma += -sigr*(rp+rm) / (r*dr*dr); } constexpr Real omega = Real(1.25); x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / gamma); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 1215eda1f6c..404aefc8c0b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -19,8 +19,8 @@ namespace amrex { // with only diagonal components. The EB is assumed to be Dirichlet. // // del dot (simga grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a -// scalar constant that is zero by default. sigma is non-zero in -// z-direction only. For now the `alpha` term has not been implemented yet. +// scalar constant that is zero by default. For now the `alpha` term has +// not been implemented yet class MLEBNodeFDLaplacian : public MLNodeLinOp @@ -72,7 +72,7 @@ public: virtual std::unique_ptr > makeFactory (int amrlev, int mglev) const final override; - virtual void scaleRHS (int amrlev, MultiFab& rhs) const final; + virtual void scaleRHS (int amrlev, Any& rhs) const final; #endif @@ -100,6 +100,7 @@ public: virtual void fixUpResidualMask (int amrlev, iMultiFab& resmsk) final override; virtual bool isSingular (int) const final override { return false; } + virtual bool isBottomSingular () const final override { return false; } virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location /*loc*/) const override; @@ -118,8 +119,10 @@ public: Array4 const& bfab) const override; #endif + virtual void postSolve (Vector& sol) const override; + private: - GpuArray m_sigma{AMREX_D_DECL(1_rt,1_rt,1_rt)}; + GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; Real m_s_phi_eb = std::numeric_limits::lowest(); Vector m_phi_eb; int m_rz = false; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index cfa7595b515..920e8540200 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -310,16 +310,20 @@ MLEBNodeFDLaplacian::prepareForSolve () AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_lobc[0][0] == BCType::Neumann, "The lo-x BC must be Neumann for 2d RZ"); } - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_sigma[0] == 0._rt, - "r-direction sigma must be zero"); + if (m_sigma[0] == 0._rt) { + m_sigma[0] = 1._rt; // For backward compatibility + } } #endif } #ifdef AMREX_USE_EB void -MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const +MLEBNodeFDLaplacian::scaleRHS (int amrlev, Any& a_rhs) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + auto const& dmask = *m_dirichlet_mask[amrlev][0]; auto factory = dynamic_cast(m_factory[amrlev][0].get()); auto const& edgecent = factory->getEdgeCent(); @@ -353,6 +357,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -393,7 +398,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phiebarr, dx0, dx1, xlo); + phiebarr, sig0, dx0, dx1, xlo); }); } else #endif @@ -410,7 +415,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phieb, dx0, dx1, xlo); + phieb, sig0, dx0, dx1, xlo); }); } else #endif @@ -429,7 +434,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa if (m_rz) { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { - mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,dx0,dx1,xlo); + mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,sig0,dx0,dx1,xlo); }); } else #endif @@ -450,6 +455,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -492,7 +498,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz_eb(i,j,k,solarr,rhsarr,dmskarr,ecx,ecy, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif @@ -511,7 +517,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz(i,j,k,solarr,rhsarr,dmskarr, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif @@ -634,22 +640,57 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& mfi, - Array4 const& gid, - Array4 const& lid, - HypreNodeLap::Int* const ncols, - HypreNodeLap::Int* const cols, - Real* const mat) const +MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& /*mfi*/, + Array4 const& /*gid*/, + Array4 const& /*lid*/, + HypreNodeLap::Int* const /*ncols*/, + HypreNodeLap::Int* const /*cols*/, + Real* const /*mat*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillIJMatrix: todo"); } void -MLEBNodeFDLaplacian::fillRHS (MFIter const& mfi, Array4 const& lid, - Real* const rhs, Array4 const& bfab) const +MLEBNodeFDLaplacian::fillRHS (MFIter const& /*mfi*/, Array4 const& /*lid*/, + Real* const /*rhs*/, Array4 const& /*bfab*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillRHS: todo"); } #endif +void +MLEBNodeFDLaplacian::postSolve (Vector& sol) const +{ +#ifdef AMREX_USE_EB + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + const auto phieb = m_s_phi_eb; + auto factory = dynamic_cast(m_factory[amrlev][0].get()); + auto const& levset_mf = factory->getLevelSet(); + auto const& levset_ar = levset_mf.const_arrays(); + MultiFab& mf = sol[amrlev].get(); + auto const& sol_ar = mf.arrays(); + if (phieb == std::numeric_limits::lowest()) { + auto const& phieb_ar = m_phi_eb[amrlev].const_arrays(); + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb_ar[bi](i,j,k); + } + }); + } else { + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb; + } + }); + } + } +#else + amrex::ignore_unused(sol); +#endif +} + } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H index a522d5aa927..1ed29a84801 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H @@ -105,7 +105,8 @@ public: // for cuda void applyBCTensor (int amrlev, int mglev, MultiFab& vel, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const; - void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const; + void compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp index 247e0fb292e..87bb78da730 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp @@ -226,7 +226,7 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode MultiFab const& kapebmf = m_eb_kappa[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, in); + compCrossTerms(amrlev, mglev, in, bndry); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -289,15 +289,23 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode } void -MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const +MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const { auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; auto area = (factory) ? factory->getAreaFrac() : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; + const Geometry& geom = m_geom[amrlev][mglev]; const auto dxinv = geom.InvCellSizeArray(); + const Box& domain = geom.growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -346,56 +354,143 @@ MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const } ); } else { - AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, - Array4 const fyfab = fluxmf[1].array(mfi);, - Array4 const fzfab = fluxmf[2].array(mfi);); - Array4 const vfab = mf.const_array(mfi); - AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, - Array4 const etayfab = etamf[1].const_array(mfi);, - Array4 const etazfab = etamf[2].const_array(mfi);); - AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, - Array4 const kapyfab = kapmf[1].const_array(mfi);, - Array4 const kapzfab = kapmf[2].const_array(mfi);); - - if (fabtyp == FabType::regular) - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, + Array4 const fyfab = fluxmf[1].array(mfi);, + Array4 const fzfab = fluxmf[2].array(mfi);); + Array4 const vfab = mf.const_array(mfi); + AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, + Array4 const etayfab = etamf[1].const_array(mfi);, + Array4 const etazfab = etamf[2].const_array(mfi);); + AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, + Array4 const kapyfab = kapmf[1].const_array(mfi);, + Array4 const kapzfab = kapmf[2].const_array(mfi);); + + if (fabtyp == FabType::regular) + { + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); } - ); - } - else - { - AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, - Array4 const& apy = area[1]->const_array(mfi);, - Array4 const& apz = area[2]->const_array(mfi);); - Array4 const& flag = flags->const_array(mfi); + } + else + { + AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, + Array4 const& apy = area[1]->const_array(mfi);, + Array4 const& apz = area[2]->const_array(mfi);); + Array4 const& flag = flags->const_array(mfi); + + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); - } - , ybx, tybx, - { - mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); - } - , zbx, tzbx, - { - mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); - } - ); - } + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv, bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv, bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv, bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } + } } } @@ -411,7 +506,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe BL_PROFILE("MLEBTensorOp::compFlux()"); if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); + amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); const int mglev = 0; const int ncomp = getNComp(); @@ -429,7 +524,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe Array& fluxmf = m_tauflux[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, sol); + compCrossTerms(amrlev, mglev, sol, m_bndry_sol[amrlev].get()); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -515,104 +610,11 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe } void -MLEBTensorOp::compVelGrad (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const +MLEBTensorOp::compVelGrad (int /*amrlev*/, + const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const { - BL_PROFILE("MLEBTensorOp::compVelGrad()"); - - if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compVelGrad() unknown location for VelGradients."); - - const int mglev = 0; - - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); - - auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; - - const Geometry& geom = m_geom[amrlev][mglev]; - const auto dxinv = geom.InvCellSizeArray(); - - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; - - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - { - Array fluxfab_tmp; - for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - - auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; - if (fabtyp == FabType::covered) continue; - - if (fabtyp == FabType::regular) - { - - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv); - } - , ybx, tybx, - { - mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv); - } - , zbx, tzbx, - { - mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); - } - ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); - } - - - } - else if ( loc==Location::FaceCenter ) - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } - else // loc==Location::FaceCentroid - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } - - } - } + amrex::Abort("compVelGrad not yet implemented for EB."); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp index c9c6eb232bb..98beecf01df 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp @@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; @@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -83,14 +83,37 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bvzhi = (bndry != nullptr) ? (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } AMREX_HOST_DEVICE_FOR_1D ( 8, icorner, { @@ -98,13 +121,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); + #endif } } - - // Notet that it is incorrect to call EnforcePeriodicity on vel. } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H index 165497d1a20..d93ea3a5d1a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H @@ -6,10 +6,95 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,0) == 0.0) + { + fx(i,j,0,0) = 0.0; + fx(i,j,0,1) = 0.0; + } + else + { + int jhip = j + flag(i ,j,0).isConnected(0, 1,0); + int jhim = j - flag(i ,j,0).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,0).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,0) == 0.0) + { + fy(i,j,0,0) = 0.0; + fy(i,j,0,1) = 0.0; + } + else + { + int ihip = i + flag(i,j ,0).isConnected( 1,0,0); + int ihim = i - flag(i,j ,0).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,0).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } } } @@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& kapx, Array4 const& apx, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,0,0)-vel(i ,jhim,0,0))*whi - +(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,0,1)-vel(i ,jhim,0,1))*whi - +(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dvdy; Real xif = kapx(i,j,0); - Real mun = 0.75*(etax(i,j,0,0)-xif); // restore the original eta + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta Real mut = etax(i,j,0,1); fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,0,1) = -mut*dudy; @@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& kapy, Array4 const& apy, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,0,0)-vel(ihim,j ,0,0))*whi - +(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,0,1)-vel(ihim,j ,0,1))*whi - +(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); Real divu = dudx; Real xif = kapy(i,j,0); - Real mun = 0.75*(etay(i,j,0,1)-xif); // restore the original eta - Real mut = etay(i,j,0,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); fy(i,j,0,0) = -mut*dvdx; fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H index 3c26566e7ac..2651addee2c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H @@ -6,11 +6,44 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); - } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i ,j,khip,n)-vel(i ,j,khim,n))*whi + + (vel(i-1,j,klop,n)-vel(i-1,j,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i,j ,khip,n)-vel(i,j ,khim,n))*whi + + (vel(i,j-1,klop,n)-vel(i,j-1,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j,k ,n)-vel(ihim,j,k ,n))*whi + + (vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i,jhip,k ,n)-vel(i,jhim,k ,n))*whi + + (vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n))*wlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -46,26 +79,24 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,k,0)-vel(i ,jhim,k,0))*whi - +(vel(i-1,jlop,k,0)-vel(i-1,jlom,k,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,k,1)-vel(i ,jhim,k,1))*whi - +(vel(i-1,jlop,k,1)-vel(i-1,jlom,k,1))*wlo); - + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); int khip = k + flag(i ,j,k).isConnected(0,0, 1); int khim = k - flag(i ,j,k).isConnected(0,0,-1); int klop = k + flag(i-1,j,k).isConnected(0,0, 1); int klom = k - flag(i-1,j,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dudz = (0.5*dzi) * ((vel(i ,j,khip,0)-vel(i ,j,khim,0))*whi - +(vel(i-1,j,klop,0)-vel(i-1,j,klom,0))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i ,j,khip,2)-vel(i ,j,khim,2))*whi - +(vel(i-1,j,klop,2)-vel(i-1,j,klom,2))*wlo); - + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); - Real mun = 0.75*(etax(i,j,k,0)-xif); // restore the original eta - Real mut = etax(i,j,k,1); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,k,1) = -mut*dudy; fx(i,j,k,2) = -mut*dudz; @@ -108,26 +139,24 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,k,0)-vel(ihim,j ,k,0))*whi - +(vel(ilop,j-1,k,0)-vel(ilom,j-1,k,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,k,1)-vel(ihim,j ,k,1))*whi - +(vel(ilop,j-1,k,1)-vel(ilom,j-1,k,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); int khip = k + flag(i,j ,k).isConnected(0,0, 1); int khim = k - flag(i,j ,k).isConnected(0,0,-1); int klop = k + flag(i,j-1,k).isConnected(0,0, 1); int klom = k - flag(i,j-1,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dvdz = (0.5*dzi) * ((vel(i,j ,khip,1)-vel(i,j ,khim,1))*whi - +(vel(i,j-1,klop,1)-vel(i,j-1,klom,1))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i,j ,khip,2)-vel(i,j ,khim,2))*whi - +(vel(i,j-1,klop,2)-vel(i,j-1,klom,2))*wlo); - + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); - Real mun = 0.75*(etay(i,j,k,1)-xif); // restore the original eta - Real mut = etay(i,j,k,0); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); fy(i,j,k,0) = -mut*dvdx; fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; fy(i,j,k,2) = -mut*dvdz; @@ -170,27 +199,457 @@ void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + int jhip = j + flag(i,j,k ).isConnected(0, 1,0); + int jhim = j - flag(i,j,k ).isConnected(0,-1,0); + int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); + int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); + whi = mlebtensor_weight(jhip-jhim); + wlo = mlebtensor_weight(jlop-jlom); + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); + + fz(i,j,k,0) = -mut*dwdx; + fz(i,j,k,1) = -mut*dwdy; + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvxlo(i-1,j,khip,n)-bvxlo(i-1,j,khim,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvxhi(i,j,klop,n)-bvxhi(i,j,klom,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i-1,j,klop,n)-vel(i-1,j,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_xface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvylo(i,j-1,khip,n)-bvylo(i,j-1,khim,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvyhi(i,j,klop,n)-bvyhi(i,j,klom,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i,j-1,klop,n)-vel(i,j-1,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_yface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvzlo(ihip,j,k-1,n)-bvzlo(ihim,j,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvzhi(ilop,j,k,n)-bvzhi(ilom,j,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_zface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvzlo(i,jhip,k-1,n)-bvzlo(i,jhim,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvzhi(i,jlop,k,n)-bvzhi(i,jlom,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_zface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept + +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,k) == 0.0) + { + fx(i,j,k,0) = 0.0; + fx(i,j,k,1) = 0.0; + fx(i,j,k,2) = 0.0; + } + else + { + int jhip = j + flag(i ,j,k).isConnected(0, 1,0); + int jhim = j - flag(i ,j,k).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,k).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + int khip = k + flag(i ,j,k).isConnected(0,0, 1); + int khim = k - flag(i ,j,k).isConnected(0,0,-1); + int klop = k + flag(i-1,j,k).isConnected(0,0, 1); + int klom = k - flag(i-1,j,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*dudy; + fx(i,j,k,2) = -mut*dudz; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,k) == 0.0) + { + fy(i,j,k,0) = 0.0; + fy(i,j,k,1) = 0.0; + fy(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j ,k).isConnected( 1,0,0); + int ihim = i - flag(i,j ,k).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,k).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + int khip = k + flag(i,j ,k).isConnected(0,0, 1); + int khim = k - flag(i,j ,k).isConnected(0,0,-1); + int klop = k + flag(i,j-1,k).isConnected(0,0, 1); + int klom = k - flag(i,j-1,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*dvdx; + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*dvdz; + } + } + } + } +} - Real dudx = (0.5*dxi) * ((vel(ihip,j,k ,0)-vel(ihim,j,k ,0))*whi - +(vel(ilop,j,k-1,0)-vel(ilom,j,k-1,0))*wlo); - Real dwdx = (0.5*dxi) * ((vel(ihip,j,k ,2)-vel(ihim,j,k ,2))*whi - +(vel(ilop,j,k-1,2)-vel(ilom,j,k-1,2))*wlo); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + Array4 const& apz, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apz(i,j,k) == 0.0) + { + fz(i,j,k,0) = 0.0; + fz(i,j,k,1) = 0.0; + fz(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j,k ).isConnected( 1,0,0); + int ihim = i - flag(i,j,k ).isConnected(-1,0,0); + int ilop = i + flag(i,j,k-1).isConnected( 1,0,0); + int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); int jhip = j + flag(i,j,k ).isConnected(0, 1,0); int jhim = j - flag(i,j,k ).isConnected(0,-1,0); int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); whi = mlebtensor_weight(jhip-jhim); wlo = mlebtensor_weight(jlop-jlom); - Real dvdy = (0.5*dyi) * ((vel(i,jhip,k ,1)-vel(i,jhim,k ,1))*whi - +(vel(i,jlop,k-1,1)-vel(i,jlom,k-1,1))*wlo); - Real dwdy = (0.5*dyi) * ((vel(i,jhip,k ,2)-vel(i,jhim,k ,2))*whi - +(vel(i,jlop,k-1,2)-vel(i,jlom,k-1,2))*wlo); - + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); - Real mun = 0.75*(etaz(i,j,k,2)-xif); // restore the original eta - Real mut = etaz(i,j,k,0); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); fz(i,j,k,0) = -mut*dwdx; fz(i,j,k,1) = -mut*dwdy; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H index c814b3b8e41..8abdde8a7c0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H @@ -4,6 +4,145 @@ #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_weight (int d) { + return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i ,jhip,k,n)-vel(i ,jhim,k,n))*whi + + (vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j ,k,n)-vel(ihim,j ,k,n))*whi + + (vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvxlo(i-1,jhip,k,n)-bvxlo(i-1,jhim,k,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvxhi(i,jlop,k,n)-bvxhi(i,jlom,k,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_xface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvylo(ihip,j-1,k,n)-bvylo(ihim,j-1,k,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvyhi(ilop,j,k,n)-bvyhi(ilom,j,k,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_yface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + } + return ddx; +} + +} + #if (AMREX_SPACEDIM == 1) #elif (AMREX_SPACEDIM == 2) #include diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index f744c96e059..09d835d8b86 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -2,6 +2,7 @@ #define AMREX_ML_LINOP_H_ #include +#include #include #include #include @@ -177,10 +178,10 @@ public: * inhomogeneous Neumann BC, the value in leveldata is assumed to be * `d./dx`. */ - virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a = nullptr, - const MultiFab* robinbc_b = nullptr, - const MultiFab* robinbc_f = nullptr) = 0; + virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, + const MultiFab* /*robinbc_a*/ = nullptr, + const MultiFab* /*robinbc_b*/ = nullptr, + const MultiFab* /*robinbc_f*/ = nullptr) {} void setVerbose (int v) noexcept { verbose = v; } @@ -197,52 +198,51 @@ public: virtual bool needsUpdate () const { return false; } virtual void update () {} - virtual void restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const = 0; - virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const = 0; - virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, - const MultiFab& fine_sol, const MultiFab& fine_rhs) = 0; + virtual void restriction (int /*amrlev*/, int /*cmglev*/, MultiFab& /*crse*/, MultiFab& /*fine*/) const {} + virtual void interpolation (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, const MultiFab& /*crse*/) const {} + virtual void interpAssign (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, MultiFab& /*crse*/) const {} + virtual void averageDownSolutionRHS (int /*camrlev*/, MultiFab& /*crse_sol*/, MultiFab& /*crse_rhs*/, + const MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, - StateMode s_mode, const MLMGBndry* bndry=nullptr) const = 0; - virtual void smooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, - bool skip_fillboundary=false) const = 0; + virtual void apply (int /*amrlev*/, int /*mglev*/, MultiFab& /*out*/, MultiFab& /*in*/, BCMode /*bc_mode*/, + StateMode /*s_mode*/, const MLMGBndry* /*bndry*/=nullptr) const {} + virtual void smooth (int /*amrlev*/, int /*mglev*/, MultiFab& /*sol*/, const MultiFab& /*rhs*/, + bool /*skip_fillboundary*/=false) const {} // Divide mf by the diagonal component of the operator. Used by bicgstab. virtual void normalize (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - virtual void solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const MultiFab& b, - const MultiFab* crse_bcdata=nullptr) = 0; - virtual void correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, - BCMode bc_mode, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void reflux (int crse_amrlev, - MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, - MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const = 0; - virtual void compFlux (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const = 0; - virtual void compGrad (int amrlev, const Array& grad, - MultiFab& sol, Location loc) const = 0; - - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void unimposeNeumannBC (int /*amrlev*/, MultiFab& /*rhs*/) const {} // only nodal solver might need it - virtual void applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual void applyOverset (int /*amlev*/, MultiFab& /*rhs*/) const {} - virtual void scaleRHS (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual Real getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, MultiFab const& /*rhs*/) const { return 0._rt; } // Only nodal solvers need it - virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/, Real /*offset*/) const {} // Only nodal solvers need it + virtual void solutionResidual (int /*amrlev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + const MultiFab* /*crse_bcdata*/=nullptr) {} + virtual void correctionResidual (int /*amrlev*/, int /*mglev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + BCMode /*bc_mode*/, const MultiFab* /*crse_bcdata*/=nullptr) {} + + virtual void reflux (int /*crse_amrlev*/, + MultiFab& /*res*/, const MultiFab& /*crse_sol*/, const MultiFab& /*crse_rhs*/, + MultiFab& /*fine_res*/, MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) const {} + virtual void compFlux (int /*amrlev*/, const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const {} + virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const {} + + virtual void unimposeNeumannBC (int /*amrlev*/, Any& /*rhs*/) const {} // only nodal solver might need it + virtual void applyInhomogNeumannTerm (int /*amrlev*/, Any& /*rhs*/) const {} + virtual void applyOverset (int /*amlev*/, Any& /*rhs*/) const {} + virtual void scaleRHS (int /*amrlev*/, Any& /*rhs*/) const {} + virtual Vector getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, + Any const& /*rhs*/) const { return {}; } + virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/, + Vector const& /*offset*/) const {} virtual void prepareForSolve () = 0; - virtual bool isSingular (int amrlev) const = 0; - virtual bool isBottomSingular () const = 0; - virtual Real xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, bool local) const = 0; + virtual bool isSingular (int /*amrlev*/) const { return false; } + virtual bool isBottomSingular () const { return false; } + virtual Real xdoty (int /*amrlev*/, int /*mglev*/, const MultiFab& /*x*/, const MultiFab& /*y*/, bool /*local*/) const { return 0._rt; } - virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } - virtual void nodalSync (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - - virtual std::unique_ptr makeNLinOp (int grid_size) const = 0; + virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const { return {nullptr}; } virtual void getFluxes (const Vector >& /*a_flux*/, const Vector& /*a_sol*/, @@ -283,6 +283,59 @@ public: virtual void copyNSolveSolution (MultiFab&, MultiFab const&) const {} + virtual Any AnyMake (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const; + virtual Any AnyMakeAlias (Any const& a) const; + virtual IntVect AnyGrowVect (Any const& a) const; + virtual void AnyCopy (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnyAdd (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnySetToZero (Any& a) const; + virtual void AnySetBndryToZero (Any& a) const; +#ifdef AMREX_USE_EB + virtual void AnySetCoveredToZero (Any& a) const; +#endif + virtual void AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period = Periodicity::NonPeriodic()) const; + + virtual Real AnyNormInf (Any& a) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const = 0; + + virtual void AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata = nullptr); + virtual void AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, + const Any& b, BCMode bc_mode, + const Any* crse_bcdata=nullptr); + virtual void AnyReflux (int crse_amrlev, + Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const = 0; + virtual void AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const; + + virtual void AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary=false) const; + + virtual void AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const; + + virtual void AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const; + virtual void AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const; + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const = 0; + + virtual void AnyAverageDownSolutionRHS (int camrlev, Any& crse_sol, Any& crse_rhs, + const Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAverageDownAndSync (Vector& sol) const = 0; + + virtual void postSolve (Vector& sol) const; + + Real MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const; + + bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; + protected: static constexpr int mg_coarsen_ratio = 2; @@ -401,7 +454,7 @@ protected: bool isCellCentered () const noexcept { return m_ixtype == 0; } - virtual void make (Vector >& mf, int nc, IntVect const& ng) const; + void make (Vector >& mf, IntVect const& ng) const; virtual std::unique_ptr > makeFactory (int /*amrlev*/, int /*mglev*/) const { return std::make_unique(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp index 9c6ccc8ce05..e53ed376d97 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp @@ -4,10 +4,12 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include #include +#include #endif #ifdef AMREX_USE_PETSC @@ -544,7 +546,7 @@ MLLinOp::defineBC () } void -MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const +MLLinOp::make (Vector >& mf, IntVect const& ng) const { mf.clear(); mf.resize(m_num_amr_levels); @@ -553,8 +555,7 @@ MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const mf[alev].resize(m_num_mg_levels[alev]); for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) { - const auto& ba = amrex::convert(m_grids[alev][mlev], m_ixtype); - mf[alev][mlev].define(ba, m_dmap[alev][mlev], nc, ng, MFInfo(), *m_factory[alev][mlev]); + mf[alev][mlev] = AnyMake(alev, mlev, ng); } } } @@ -895,6 +896,279 @@ MLLinOp::resizeMultiGrid (int new_size) } } +Any +MLLinOp::AnyMake (int amrlev, int mglev, IntVect const& ng) const +{ + return Any(MultiFab(amrex::convert(m_grids[amrlev][mglev], m_ixtype), + m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(), + *m_factory[amrlev][mglev])); +} + +Any +MLLinOp::AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const +{ + BoxArray cba = m_grids[amrlev][mglev]; + IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev]; + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[amrlev][mglev], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const +{ + BoxArray cba = m_grids[famrlev][0]; + IntVect ratio(AMRRefRatio(famrlev-1)); + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[famrlev][0], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeAlias (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return Any(MultiFab(mf, amrex::make_alias, 0, mf.nComp())); +} + +IntVect +MLLinOp::AnyGrowVect (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return mf.nGrowVect(); +} + +void +MLLinOp::AnySetToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setVal(0._rt); +} + +void +MLLinOp::AnySetBndryToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setBndry(0._rt, 0, getNComp()); +} + +#ifdef AMREX_USE_EB +void +MLLinOp::AnySetCoveredToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + EB_set_covered(mf, 0, getNComp(), 0, 0._rt); +} +#endif + +void +MLLinOp::AnyCopy (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Copy(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAdd (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Add(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAverageDownSolutionRHS (int camrlev, Any& a_crse_sol, Any& a_crse_rhs, + const Any& a_fine_sol, const Any& a_fine_rhs) +{ + AMREX_ASSERT(a_crse_sol.is() && + a_crse_rhs.is() && + a_fine_sol.is() && + a_fine_rhs.is()); + auto& crse_sol = a_crse_sol.get(); + auto& crse_rhs = a_crse_rhs.get(); + auto& fine_sol = a_fine_sol.get(); + auto& fine_rhs = a_fine_rhs.get(); + averageDownSolutionRHS(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); +} + +void +MLLinOp::AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period) const +{ + AMREX_ASSERT(dst.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + dmf.ParallelCopy(smf, 0, 0, getNComp(), src_nghost, dst_nghost, period); +} + +Real +MLLinOp::AnyNormInf (Any& a) const +{ + AMREX_ASSERT(a.is()); + return a.get().norminf(); +} + +void +MLLinOp::AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + solutionResidual(amrlev, resid.get(), x.get(), b.get(), + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, const Any& b, + BCMode bc_mode, const Any* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + correctionResidual(amrlev, mglev, resid.get(), x.get(), + b.get(), bc_mode, + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyReflux (int clev, Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs) +{ + AMREX_ASSERT(res.is()); + reflux(clev,res.get(), crse_sol.get(), crse_rhs.get(), + fine_res.get(), fine_sol.get(), fine_rhs.get()); +} + +Real +MLLinOp::MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const +{ + const int ncomp = getNComp(); + Real norm = 0._rt; + + if (fine_mask == nullptr) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = fine_mask->const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = fine_mask->const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + } + }); + } + } + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLLinOp::AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), + mg_coarsen_ratio_vec[clev-1]); +} + +void +MLLinOp::AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary) const +{ + AMREX_ASSERT(sol.is() && rhs.is()); + smooth(amrlev, mglev, sol.get(), rhs.get(), skip_fillboundary); +} + +void +MLLinOp::AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + restriction(amrlev, cmglev, crse.get(), fine.get()); +} + +void +MLLinOp::AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpolation(amrlev, fmglev, fine.get(), crse.get()); +} + +void +MLLinOp::AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpAssign(amrlev, fmglev, fine.get(), crse.get()); +} + +void +MLLinOp::postSolve (Vector& /* sol */) const {} + +bool +MLLinOp::isMFIterSafe (int amrlev, int mglev1, int mglev2) const +{ + return m_dmap[amrlev][mglev1] == m_dmap[amrlev][mglev2] + && BoxArray::SameRefs(m_grids[amrlev][mglev1], m_grids[amrlev][mglev2]); +} + #ifdef AMREX_USE_PETSC std::unique_ptr MLLinOp::makePETSc () const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H new file mode 100644 index 00000000000..68d7c836ba5 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H @@ -0,0 +1,486 @@ +#ifndef AMREX_MLLINOP_TEMP_H_ +#define AMREX_MLLINOP_TEMP_H_ + +//! This is a template for writing your own linear operator class for Ax=b. + +#include + +namespace amrex_temp +{ + +class MLLinOpTemp + : public amrex::MLLinOp +{ +public: + + //! In this example, there are 3 edge based MultiFabs. + using Container = amrex::Array; + + MLLinOpTemp () {} + + virtual ~MLLinOpTemp () {} + + MLLinOpTemp (const MLLinOpTemp&) = delete; + MLLinOpTemp (MLLinOpTemp&&) = delete; + MLLinOpTemp& operator= (const MLLinOpTemp&) = delete; + MLLinOpTemp& operator= (MLLinOpTemp&&) = delete; + + MLLinOpTemp (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + void define (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + amrex::MLLinOp::define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + /** + * \brief Return the default solver at the bottom of MG cycles. By + * default, MLLinOp uses a BiCGStab solver implemented in + * AMReX::MLCGSolver. However, it only supports a single MultiFab. + * Since our data type is different, we use a smoother instead. In the + * future we can try to generalize MLCGSolver. + */ + virtual amrex::BottomSolver getDefaultBottomSolver () const override { + return amrex::BottomSolver::smoother; + } + + /** + * \brief Make data container (e.g., MultiFabs stored in Any) for given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMake (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto const& ba = m_grids[amrlev][mglev]; + auto const& dm = m_dmap [amrlev][mglev]; + auto const& fc = *m_factory[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng, amrex::MFInfo(), fc)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the give MG level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. The coarser level is mglev+1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseMG (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto ratio = (amrlev > 0) ? amrex::IntVect(2) : this->mg_coarsen_ratio_vec[mglev]; + auto const& ba = amrex::coarsen(m_grids[amrlev][mglev], ratio); + auto const& dm = m_dmap[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the given AMR level. + * + * \param famrlev AMR level. The coarser AMR level is famrlev-1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseAmr (int famrlev, amrex::IntVect const& ng) const override + { + amrex::IntVect ratio(this->AMRRefRatio(famrlev-1)); + auto const& ba = amrex::coarsen(m_grids[famrlev][0], ratio); + auto const& dm = m_dmap[famrlev][0]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make an alias of the given Any without deepcopying. + * + * \param a an Any object. + */ + virtual amrex::Any AnyMakeAlias (amrex::Any const& a) const override + { + auto const& rhs = a.get(); + return amrex::Any(Container{amrex::MultiFab(rhs[0], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[1], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[2], amrex::make_alias, 0, 1)}); + } + + /** + * \brief Retuen the number of ghost cells in the given Any. + * + * \param a an Any object. + */ + virtual amrex::IntVect AnyGrowVect (amrex::Any const& a) const override + { + auto const& mfs = a.get(); + return mfs[0].nGrowVect(); + } + + /** + * \brief Copy data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyCopy (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Copy(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Add data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyAdd (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Add(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Set the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setVal(amrex::Real(0.0)); + } + } + + /** + * \brief Set boundary (i.e., ghost cells) the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetBndryToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setBndry(amrex::Real(0.0), 0, 1); + } + } + +#ifdef AMREX_USE_EB + /** + * \brief Set covered region of the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetCoveredToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::EB_set_covered(mfs[idim], 0, 1, 0, amrex::Real(0.0)); + } + } +#endif + + /** + * \brief ParallelCopy from source Any ot destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param src_nghost number of ghost cells in the source included in the operation. + * \param dst_nghost number of ghost cells in the destination included in the operation. + * \param period Periodicity. + */ + virtual void AnyParallelCopy (amrex::Any& dst, amrex::Any const& src, + amrex::IntVect const& src_nghost, amrex::IntVect const& dst_nghost, + amrex::Periodicity const& period = amrex::Periodicity::NonPeriodic()) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_nowait(smf[idim], 0, 0, 1, src_nghost, dst_nghost, period); + } + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_finish(); + } + } + + /** + * \brief Return the infinity norm of the given Any. + * + * \param a an Any object. + */ + virtual amrex::Real AnyNormInf (amrex::Any& a) const override + { + auto& mfs = a.get(); + amrex::Real r = amrex::Real(0.0); + for (int idim=0; idim < 3; ++idim) { + auto tmp = mfs[idim].norminf(0, 0, true); + r = std::max(r, tmp); + } + amrex::ParallelAllReduce::Max(r, amrex::ParallelContext::CommunicatorSub()); + return r; + } + + /** + * \brief Return the infinity norm of the masked region of the given Any. + * + * For a composite solve with multiple AMR levels, the region covered by + * finer AMR levels are not included in the operation. + * + * \parame amrlev AMR level. + * \param a an Any object. + * \parame local determines if the reduction is local (i.e., no MPI communication) or not. + */ + virtual amrex::Real AnyNormInfMask (int amrlev, amrex::Any const& a, bool local) const override + { + amrex::ignore_unused(amrlev, a, local); + amrex::Abort("TODO: AnyNormInfMask"); + // This is only needed for multi-level composite solve + return amrex::Real(0.0); + } + + /** + * \brief Compute residual of the original form, r = b - Ax. + * + * \param amrlev AMR level + * \param resid residual + * \param x the solution x + * \param b the RHS b + * \param crse_bcdata provides Dirichlet BC at AMR coarse/fine interface. + * It's a nullptr for single level solve. + */ + virtual void AnySolutionResidual (int amrlev, amrex::Any& resid, amrex::Any& x, amrex::Any const& b, + amrex::Any const* crse_bcdata = nullptr) override + { + amrex::ignore_unused(amrlev, resid, x, b, crse_bcdata); + amrex::Abort("TODO: AnySolutionResidual"); + } + + /** + * \brief Compute residual of the residual correction form, r = b - Ax. + * + * \param amrlev AMR level. + * \param resid residual of the residual correction form. + * \param x the correction. + * \param b the RHS for the residual correction form (i.e., the residual of the original form. + * \param bc_mode is either Homogeneous or Inhomogeneous. + * \param crse_bcdata provides inhomogenous Dirichlet BC at AMR coarse/fine interface. + * It's ignored for homogeneous Dirichlet BC. + */ + virtual void AnyCorrectionResidual (int amrlev, int mglev, amrex::Any& resid, amrex::Any& x, + const amrex::Any& b, MLLinOp::BCMode bc_mode, + const amrex::Any* crse_bcdata=nullptr) override + { + amrex::ignore_unused(amrlev, mglev, resid, x, b, bc_mode, crse_bcdata); + amrex::Abort("TODO: AnyCorrectionResidual"); + } + + /** + * \brief Reflux + * + * This modifies the coarse level residual at the coarse/fine interface. + * + * \param crse_amrlev coarse AMR level. + * \param res coarse level residual. + * \param crse_sol coarse level x. + * \param crse_rhs coarse level b. + * \param fine_res fine level residual. This may not be needed depending on the coarse/fine stencil. + * \param fine_sol fine level x. + * \param fine_rhs fine level b. + */ + virtual void AnyReflux (int crse_amrlev, + amrex::Any& res, const amrex::Any& crse_sol, const amrex::Any& crse_rhs, + amrex::Any& fine_res, amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(crse_amrlev, res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + amrex::Abort("TODO: AnyReflux"); + // This is only needed for multi-level composite solve + } + + /** + * \brief Average down residual from fine to coarse AMR level. + * + * \param clev coarse ARR level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResAmr (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResAmr"); + // This is only needed for mulit-level composite solve. + // And maybe there is nothing neeed to be done here, like in the nodal projection solver. + } + + /** + * \brief Average down residual from fine to coarse MG level. + * + * This is only needed for MG F-cycle, and we don't need to implement this for V-cycle. + * + * \param clev coarse MG level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResMG (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResMG"); // Not needed for V-cycle. + } + + /** + * \brief Smooth the given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param sol x + * \param rhs b + * \param skip_fillboundary a flag for if we need to fill ghost cells in this function. + */ + virtual void AnySmooth (int amrlev, int mglev, amrex::Any& sol, const amrex::Any& rhs, + bool skip_fillboundary=false) const override + { + amrex::ignore_unused(amrlev, mglev, sol, rhs, skip_fillboundary); + amrex::Abort("TODO: AnySmooth"); + } + + /** + * \brief Restriction from fine to coarse MG level. + * + * \param amrlev AMR level. + * \param cmglev coarse MG level. The fine MG level is cmglev-1. + * \param crse coarse data. + * \param fine fine data. This is not const& because we may need to fill its ghost cells. + */ + virtual void AnyRestriction (int amrlev, int cmglev, amrex::Any& crse, amrex::Any& fine) const override + { + amrex::ignore_unused(amrlev, cmglev, crse, fine); + amrex::Abort("TODO: AnyRestriction"); + } + + /** + * \brief Add interpolated coarse data onto the fine MG level. + * + * Note that it's an ADD operation. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpolationMG (int amrlev, int fmglev, amrex::Any& fine, const amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpolationMG"); + } + + /** + * \brief Assign (i.e., copy) interpolated coarse data onto the fine MG level. + * + * Note that it's an ASSIGN operation. This is used in MG F-cycle, and + * does not need to be implemented for V-cycle. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpAssignMG (int amrlev, int fmglev, amrex::Any& fine, amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpAssignMG"); // not needed for V-cycle. + } + + /** + * \brief Interpolate data from coarse to fine AMR level. + * + * \param famrlev fine AMR level. The coarse AMR level is famrlev-1. + * \param fine data on fine AMR level. + * \param crse data on coarse AMR level. + */ + virtual void AnyInterpolationAmr (int famrlev, amrex::Any& fine, const amrex::Any& crse, + amrex::IntVect const& /*nghost*/) const override + { + amrex::ignore_unused(famrlev, fine, crse); + // This is only needed for multi-level composite solve + amrex::Abort("TODO: AnyInterpolationAmr"); + } + + /** + * \brief Average down x and b from fine to coarse AMR level. + * + * This is called before V-cycle to make data on AMR levels consistent. + * + * \param camrlev coarse AMR level. The fine level is camrlev+1. + * \param crse_sol x on coarse level. + * \param crse_rhs b on coarse level. + * \param fine_sol x on fine level. + * \param fine_rhs b on fine level. + */ + virtual void AnyAverageDownSolutionRHS (int camrlev, amrex::Any& crse_sol, amrex::Any& crse_rhs, + const amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); + // This is only needed for multi-level composite solve + amrex::Abort("AnyAverageDownSolutionRHS"); + } + + /** + * \brief Average down and synchronize AMR data. + * + * Synchronize the data on each level. That is the nodal data in the + * same MultiFab needs to be synchronized. This function also needs to + * average down the data from fine to coarse AMR levels. + * + * \param sol data on all AMR levels. + */ + virtual void AnyAverageDownAndSync (amrex::Vector& sol) const override + { + amrex::ignore_unused(sol); + // Even for single level, we shoudl synchronize the data on level 0. + amrex::Abort("TODO: AnyAverageDownAndSync"); + } + + /** + * \brief Prepare the solver for MG cycle. + */ + virtual void prepareForSolve () override + { + amrex::Abort("TODO: prepareForSolve"); + } +}; + +} + + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 32980d74c45..e884f877fbc 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -36,6 +36,10 @@ public: Real solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + // For this version of solve, Any holds MultiFab like objects. + Real solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + void getGradSolution (const Vector >& a_grad_sol, Location a_loc = Location::FaceCenter); @@ -121,7 +125,7 @@ public: void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;} #endif - void prepareForSolve (const Vector& a_sol, const Vector& a_rhs); + void prepareForSolve (Vector& a_sol, const Vector& a_rhs); void prepareForNSolve (); @@ -151,19 +155,16 @@ public: Real MLRhsNormInf (bool local = false); void buildFineMask (); - void averageDownAndSync (); - - void computeVolInv (); void makeSolvable (); - void makeSolvable (int amrlev, int mglev, MultiFab& mf); + void makeSolvable (int amrlev, int mglev, Any& mf); #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) - void bottomSolveWithHypre (MultiFab& x, const MultiFab& b); + void bottomSolveWithHypre (Any& x, const Any& b); #endif - void bottomSolveWithPETSc (MultiFab& x, const MultiFab& b); + void bottomSolveWithPETSc (Any& x, const Any& b); - int bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type); + int bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type); Real getInitRHS () const noexcept { return m_rhsnorm0; } // Initial composite residual @@ -242,26 +243,21 @@ private: * \brief To avoid confusion, terms like sol, cor, rhs, res, ... etc. are * in the frame of the original equation, not the correction form */ - Vector > sol_raii; - Vector sol; //!< alias to argument a_sol - Vector rhs; //!< Copy of original rhs - //! L(sol) = rhs + Vector sol; //!< Might be alias to argument a_sol + Vector rhs; //!< Copy of original rhs + //! L(sol) = rhs + + Vector sol_is_alias; /** * \brief First Vector: Amr levels. 0 is the coarest level * Second Vector: MG levels. 0 is the finest level */ - Vector > res; //! = rhs - L(sol) - Vector > > cor; //!< L(cor) = res - Vector > > cor_hold; - Vector > rescor; //!< = res - L(cor) - //! Residual of the correction form - - Vector > fine_mask; - - Vector > volinv; //!< used by makeSolvable - - Vector > scratch; + Vector > res; //! = rhs - L(sol) + Vector > cor; //!< L(cor) = res + Vector > cor_hold; + Vector > rescor; //!< = res - L(cor) + //! Residual of the correction form enum timer_types { solve_time=0, iter_time, bottom_time, ntimers }; Vector timer; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index 2bdb9222b4b..28c833397b4 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #ifdef AMREX_USE_PETSC @@ -51,25 +50,52 @@ MLMG::~MLMG () Real MLMG::solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) +{ + Vector any_sol(namrlevs); + Vector any_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + any_sol[lev] = MultiFab(*a_sol[lev], amrex::make_alias, 0, a_sol[lev]->nComp()); + any_rhs[lev] = MultiFab(*a_rhs[lev], amrex::make_alias, 0, a_rhs[lev]->nComp()); + } + return solve(any_sol, any_rhs, a_tol_rel, a_tol_abs, checkpoint_file); +} + +Real +MLMG::solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) { BL_PROFILE("MLMG::solve()"); if (checkpoint_file != nullptr) { - checkPoint(a_sol, a_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + if (a_sol[0].is()) { + Vector mf_sol(namrlevs); + Vector mf_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + mf_sol[lev] = &(a_sol[lev].get()); + mf_rhs[lev] = &(a_rhs[lev].get()); + } + checkPoint(mf_sol, mf_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + } else { + amrex::Abort("MLMG::solve: checkpoint not supported for non-MultiFab type"); + } } if (bottom_solver == BottomSolver::Default) { bottom_solver = linop.getDefaultBottomSolver(); } +#if defined(AMREX_USE_HYPRE) || defined(AMREX_USE_PETSC) if (bottom_solver == BottomSolver::hypre || bottom_solver == BottomSolver::petsc) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(a_sol[0].is(), + "Non-MultiFab type not supported for hypre and petsc"); int mo = linop.getMaxOrder(); - if (a_sol[0]->hasEBFabFactory()) { + if (a_sol[0].get().hasEBFabFactory()) { linop.setMaxOrder(2); } else { linop.setMaxOrder(std::min(3,mo)); // maxorder = 4 not supported } } +#endif bool is_nsolve = linop.m_parent; @@ -84,8 +110,6 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh computeMLResidual(finest_amr_lev); - int ncomp = linop.getNComp(); - bool local = true; Real resnorm0 = MLResNormInf(finest_amr_lev, local); Real rhsnorm0 = MLRhsNormInf(local); @@ -194,15 +218,16 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh timer[iter_time] = amrex::second() - iter_start_time; } + linop.postSolve(sol); + IntVect ng_back = final_fill_bc ? IntVect(1) : IntVect(0); if (linop.hasHiddenDimension()) { ng_back[linop.hiddenDirection()] = 0; } for (int alev = 0; alev < namrlevs; ++alev) { - if (a_sol[alev] != sol[alev]) - { - MultiFab::Copy(*a_sol[alev], *sol[alev], 0, 0, ncomp, ng_back); + if (!sol_is_alias[alev]) { + linop.AnyCopy(a_sol[alev], sol[alev], ng_back); } } @@ -229,16 +254,13 @@ void MLMG::oneIter (int iter) { BL_PROFILE("MLMG::oneIter()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - for (int alev = finest_amr_lev; alev > 0; --alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); // compute residual for the coarse AMR level computeResWithCrseSolFineCor(alev-1,alev); @@ -250,7 +272,6 @@ void MLMG::oneIter (int iter) // coarsest amr level { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(0); // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { @@ -258,24 +279,27 @@ void MLMG::oneIter (int iter) } if (iter < max_fmg_iters) { - mgFcycle (); + mgFcycle(); } else { - mgVcycle (0, 0); + mgVcycle(0, 0); } - MultiFab::Add(*sol[0], *cor[0][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(0)); + linop.AnyAdd(sol[0], cor[0][0], nghost); } for (int alev = 1; alev <= finest_amr_lev; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); // (Fine AMR correction) = I(Coarse AMR correction) interpCorrection(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor_hold[alev][0], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor_hold[alev][0], cor[alev][0], nghost); } // Update fine AMR level correction @@ -283,14 +307,14 @@ void MLMG::oneIter (int iter) miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor[alev][0], *cor_hold[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[alev][0], cor_hold[alev][0], nghost); } } - averageDownAndSync(); + linop.AnyAverageDownAndSync(sol); } // Compute multi-level Residual (res) up to amrlevmax. @@ -301,11 +325,11 @@ MLMG::computeMLResidual (int amrlevmax) const int mglev = 0; for (int alev = amrlevmax; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; - linop.solutionResidual(alev, res[alev][mglev], *sol[alev], rhs[alev], crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][mglev], sol[alev], rhs[alev], crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, res[alev][mglev], *sol[alev], rhs[alev], - res[alev+1][mglev], *sol[alev+1], rhs[alev+1]); + linop.AnyReflux(alev, res[alev][mglev], sol[alev], rhs[alev], + res[alev+1][mglev], sol[alev+1], rhs[alev+1]); } } } @@ -315,16 +339,8 @@ void MLMG::computeResidual (int alev) { BL_PROFILE("MLMG::computeResidual()"); - - MultiFab& x = *sol[alev]; - const MultiFab& b = rhs[alev]; - MultiFab& r = res[alev][0]; - - const MultiFab* crse_bcdata = nullptr; - if (alev > 0) { - crse_bcdata = sol[alev-1]; - } - linop.solutionResidual(alev, r, x, b, crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][0], sol[alev], rhs[alev], crse_bcdata); } // Compute coarse AMR level composite residual with coarse solution and fine correction @@ -333,39 +349,28 @@ MLMG::computeResWithCrseSolFineCor (int calev, int falev) { BL_PROFILE("MLMG::computeResWithCrseSolFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = std::min(linop.getNGrow(falev),linop.getNGrow(calev)); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(std::min(linop.getNGrow(falev),linop.getNGrow(calev))); - MultiFab& crse_sol = *sol[calev]; - const MultiFab& crse_rhs = rhs[calev]; - MultiFab& crse_res = res[calev][0]; + Any& crse_sol = sol[calev]; + const Any& crse_rhs = rhs[calev]; + Any& crse_res = res[calev][0]; - MultiFab& fine_sol = *sol[falev]; - const MultiFab& fine_rhs = rhs[falev]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_sol = sol[falev]; + const Any& fine_rhs = rhs[falev]; + Any& fine_cor = cor[falev][0]; + Any& fine_res = res[falev][0]; + Any& fine_rescor = rescor[falev][0]; - const MultiFab* crse_bcdata = nullptr; - if (calev > 0) { - crse_bcdata = sol[calev-1]; - } - linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); + const Any* crse_bcdata = (calev > 0) ? &(sol[calev-1]) : nullptr; + linop.AnySolutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); + linop.AnyCopy(fine_res, fine_rescor, nghost); - linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + linop.AnyReflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); - if (linop.isCellCentered()) { - const int amrrr = linop.AMRRefRatio(calev); -#ifdef AMREX_USE_EB - amrex::EB_average_down(fine_res, crse_res, 0, ncomp, amrrr); -#else - amrex::average_down(fine_res, crse_res, 0, ncomp, amrrr); -#endif - } + linop.AnyAvgDownResAmr(calev, crse_res, fine_res); } // Compute fine AMR level residual fine_res = fine_res - L(fine_cor) with coarse providing BC. @@ -374,20 +379,19 @@ MLMG::computeResWithCrseCorFineCor (int falev) { BL_PROFILE("MLMG::computeResWithCrseCorFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(falev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(falev)); - const MultiFab& crse_cor = *cor[falev-1][0]; + const Any& crse_cor = cor[falev-1][0]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_cor = cor [falev][0]; + Any& fine_res = res [falev][0]; + Any& fine_rescor = rescor[falev][0]; // fine_rescor = fine_res - L(fine_cor) - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, - BCMode::Inhomogeneous, &crse_cor); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, + BCMode::Inhomogeneous, &crse_cor); + linop.AnyCopy(fine_res, fine_rescor, nghost); } void @@ -413,16 +417,16 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = res[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], + skip_fillboundary); skip_fillboundary = false; } @@ -431,14 +435,13 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm after smooth " << norm << "\n"; } // res_crse = R(rescor_fine); this provides res/b to the level below - linop.restriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); - + linop.AnyRestriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); } BL_PROFILE_VAR("MLMG::mgVcycle_bottom", blp_bottom); @@ -446,7 +449,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " DN: Norm before bottom " << norm << "\n"; } @@ -454,7 +457,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " UP: Norm after bottom " << norm << "\n"; @@ -464,21 +467,21 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev_bottom]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev_bottom]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev_bottom, *cor[amrlev][mglev_bottom], res[amrlev][mglev_bottom], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom], + res[amrlev][mglev_bottom], skip_fillboundary); skip_fillboundary = false; } if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm after smooth " << norm << "\n"; } @@ -493,12 +496,12 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm before smooth " << norm << "\n"; } for (int i = 0; i < nu2; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev]); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev]); } if (cf_strategy == CFStrategy::ghostnodes) computeResOfCorrection(amrlev, mglev); @@ -506,7 +509,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm after smooth " << norm << "\n"; } @@ -521,21 +524,18 @@ MLMG::mgFcycle () { BL_PROFILE("MLMG::mgFcycle()"); +#ifdef AMREX_USE_EB + AMREX_ASSERT(linop.isCellCentered()); +#endif + const int amrlev = 0; const int mg_bottom_lev = linop.NMGLevels(amrlev) - 1; - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(amrlev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(amrlev)); for (int mglev = 1; mglev <= mg_bottom_lev; ++mglev) { -#ifdef AMREX_USE_EB - amrex::EB_average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#else - amrex::average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#endif + linop.AnyAvgDownResMG(mglev, res[amrlev][mglev], res[amrlev][mglev-1]); } bottomSolve(); @@ -543,17 +543,17 @@ MLMG::mgFcycle () for (int mglev = mg_bottom_lev-1; mglev >= 0; --mglev) { // cor_fine = I(cor_crse) - interpCorrection (amrlev, mglev); + interpCorrection(amrlev, mglev); // rescor = res - L(cor) computeResOfCorrection(amrlev, mglev); // res = rescor; this provides b to the vcycle below - MultiFab::Copy(res[amrlev][mglev], rescor[amrlev][mglev], 0,0,ncomp,nghost); + linop.AnyCopy(res[amrlev][mglev], rescor[amrlev][mglev], nghost); // save cor; do v-cycle; add the saved to cor std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]); mgVcycle(amrlev, mglev); - MultiFab::Add(*cor[amrlev][mglev], *cor_hold[amrlev][mglev], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], nghost); } } @@ -563,17 +563,11 @@ MLMG::interpCorrection (int alev) { BL_PROFILE("MLMG::interpCorrection_1"); - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); - - const MultiFab& crse_cor = *cor[alev-1][0]; - MultiFab& fine_cor = *cor[alev][0]; + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); - BoxArray ba = fine_cor.boxArray(); - const int amrrr = linop.AMRRefRatio(alev-1); - IntVect refratio{amrrr}; - ba.coarsen(refratio); + Any const& crse_cor = cor[alev-1][0]; + Any & fine_cor = cor[alev ][0]; const Geometry& crse_geom = linop.Geom(alev-1,0); @@ -584,121 +578,12 @@ MLMG::interpCorrection (int alev) ng_src = linop.getNGrow(alev-1); ng_dst = linop.getNGrow(alev-1); } - MultiFab cfine(ba, fine_cor.DistributionMap(), ncomp, ng_dst); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, ng_src, ng_dst, crse_geom.periodicity()); - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - Array4 const& ff = fine_cor.array(mfi); - Array4 const& cc = cfine.const_array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); - } + Any cfine = linop.AnyMakeCoarseAmr(alev, IntVect(ng_dst)); + linop.AnySetToZero(cfine); + linop.AnyParallelCopy(cfine, crse_cor, IntVect(ng_src), IntVect(ng_dst), crse_geom.periodicity()); - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); - } - } - } - } - else - { - AMREX_ALWAYS_ASSERT(amrrr == 2 || amrrr == 4); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Box fbx = mfi.tilebox(); - if (cf_strategy == CFStrategy::ghostnodes && nghost >1) fbx.grow(nghost); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cfine.const_array(mfi); - - if (amrrr == 2) { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } else { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); - }); - } - } - } + linop.AnyInterpolationAmr(alev, fine_cor, cfine, nghost); } // Interpolate correction between MG levels @@ -709,119 +594,9 @@ MLMG::interpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::interpCorrection_2"); - MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; - - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); - - const Geometry& crse_geom = linop.Geom(alev,mglev+1); - const IntVect refratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - MultiFab cfine; - const MultiFab* cmf; - - if (amrex::isMFIterSafe(crse_cor, fine_cor)) - { - crse_cor.FillBoundary(crse_geom.periodicity()); - cmf = &crse_cor; - } - else - { - BoxArray cba = fine_cor.boxArray(); - cba.coarsen(refratio); - IntVect ng = linop.isCellCentered() ? crse_cor.nGrowVect() : IntVect(0); - if (cf_strategy == CFStrategy::ghostnodes) ng = IntVect(nghost); - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); - cmf = & cfine; - } - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); - -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - const auto& ff = fine_cor.array(mfi); - const auto& cc = cmf->array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { -#if (AMREX_SPACEDIM == 3) - if (linop.hasHiddenDimension()) { - Box const& bx_2d = linop.compactify(bx); - auto const& ff_2d = linop.compactify(ff); - auto const& cc_2d = linop.compactify(cc); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, - { - TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); - }); - } else -#endif - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - } - } - } - } - else - { -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - const Box& fbx = mfi.tilebox(); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cmf->const_array(mfi); - - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } - } + Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; + linop.AnyInterpAssignMG(alev, mglev, fine_cor, crse_cor); } // (Fine MG level correction) += I(Coarse MG level correction) @@ -830,31 +605,24 @@ MLMG::addInterpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::addInterpCorrection()"); - const int ncomp = linop.getNComp(); - - const MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; + const Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; - MultiFab cfine; - const MultiFab* cmf; + Any cfine; + const Any* cany; - if (amrex::isMFIterSafe(crse_cor, fine_cor)) + if (linop.isMFIterSafe(alev, mglev, mglev+1)) { - cmf = &crse_cor; + cany = &crse_cor; } else { - BoxArray cba = fine_cor.boxArray(); - IntVect ratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - cba.coarsen(ratio); - const int ng = 0; - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.ParallelCopy(crse_cor); - cmf = &cfine; + cfine = linop.AnyMakeCoarseMG(alev, mglev, IntVect(0)); + linop.AnyParallelCopy(cfine,crse_cor,IntVect(0),IntVect(0)); + cany = &cfine; } - linop.interpolation(alev, mglev, fine_cor, *cmf); + linop.AnyInterpolationMG(alev, mglev, fine_cor, *cany); } // Compute rescor = res - L(cor) @@ -865,10 +633,10 @@ void MLMG::computeResOfCorrection (int amrlev, int mglev) { BL_PROFILE("MLMG:computeResOfCorrection()"); - MultiFab& x = *cor[amrlev][mglev]; - const MultiFab& b = res[amrlev][mglev]; - MultiFab& r = rescor[amrlev][mglev]; - linop.correctionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); + Any & x = cor[amrlev][mglev]; + const Any& b = res[amrlev][mglev]; + Any & r = rescor[amrlev][mglev]; + linop.AnyCorrectionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); } // At the true bottom of the coarset AMR level. @@ -894,7 +662,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_sol.setVal(0.0); - MultiFab const& res_bottom = res[0].back(); + MultiFab const& res_bottom = res[0].back().get(); if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) && DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap())) { @@ -906,7 +674,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_solver.solve({&a_sol}, {&a_rhs}, Real(-1.0), Real(-1.0)); - linop.copyNSolveSolution(*cor[0].back(), a_sol); + linop.copyNSolveSolution(cor[0].back().get(), a_sol); } void @@ -914,8 +682,6 @@ MLMG::actualBottomSolve () { BL_PROFILE("MLMG::actualBottomSolve()"); - const int ncomp = linop.getNComp(); - if (!linop.isBottomActive()) return; auto bottom_start_time = amrex::second(); @@ -924,28 +690,28 @@ MLMG::actualBottomSolve () const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; - MultiFab& x = *cor[amrlev][mglev]; - MultiFab& b = res[amrlev][mglev]; + auto& x = cor[amrlev][mglev]; + auto& b = res[amrlev][mglev]; - x.setVal(0.0); + linop.AnySetToZero(x); if (bottom_solver == BottomSolver::smoother) { bool skip_fillboundary = true; for (int i = 0; i < nuf; ++i) { - linop.smooth(amrlev, mglev, x, b, skip_fillboundary); + linop.AnySmooth(amrlev, mglev, x, b, skip_fillboundary); skip_fillboundary = false; } } else { - MultiFab* bottom_b = &b; - MultiFab raii_b; + Any* bottom_b = &b; + Any raii_b; if (linop.isBottomSingular() && linop.getEnforceSingularSolvable()) { - raii_b.define(b.boxArray(), b.DistributionMap(), ncomp, b.nGrowVect(), - MFInfo(), *linop.Factory(amrlev,mglev)); - MultiFab::Copy(raii_b,b,0,0,ncomp,b.nGrowVect()); + const IntVect ng = linop.AnyGrowVect(b); + raii_b = linop.AnyMake(amrlev, mglev, ng); + linop.AnyCopy(raii_b, b, ng); bottom_b = &raii_b; makeSolvable(amrlev,mglev,*bottom_b); @@ -973,7 +739,7 @@ MLMG::actualBottomSolve () int ret = bottomSolveWithCG(x, *bottom_b, cg_type); // If the MLMG solve failed then set the correction to zero if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); if (bottom_solver == BottomSolver::cgbicg || bottom_solver == BottomSolver::bicgcg) { if (bottom_solver == BottomSolver::cgbicg) { @@ -983,7 +749,7 @@ MLMG::actualBottomSolve () } ret = bottomSolveWithCG(x, *bottom_b, cg_type); if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); } else { // switch permanently if (cg_type == MLCGSolver::Type::CG) { bottom_solver = BottomSolver::cg; @@ -995,7 +761,7 @@ MLMG::actualBottomSolve () } const int n = (ret==0) ? nub : nuf; for (int i = 0; i < n; ++i) { - linop.smooth(amrlev, mglev, x, b); + linop.AnySmooth(amrlev, mglev, x, b); } } } @@ -1006,7 +772,7 @@ MLMG::actualBottomSolve () } int -MLMG::bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type) +MLMG::bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type) { MLCGSolver cg_solver(this, linop); cg_solver.setSolver(type); @@ -1027,37 +793,7 @@ Real MLMG::ResNormInf (int alev, bool local) { BL_PROFILE("MLMG::ResNormInf()"); - const int ncomp = linop.getNComp(); - const int mglev = 0; - Real norm = 0.0; - MultiFab* pmf = &(res[alev][mglev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, res[alev][mglev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::ResNormInf: not EB Factory"); - } - } -#endif - for (int n = 0; n < ncomp; n++) - { - Real newnorm = 0.0; - if (fine_mask[alev]) { - newnorm = pmf->norm0(*fine_mask[alev],n,0,true); - } else { - newnorm = pmf->norm0(n,0,true); - } - norm = std::max(norm, newnorm); - } - if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); - return norm; + return linop.AnyNormInfMask(alev, res[alev][0], local); } // Computes multi-level masked inf-norm of Residual (res). @@ -1079,66 +815,17 @@ Real MLMG::MLRhsNormInf (bool local) { BL_PROFILE("MLMG::MLRhsNormInf()"); - const int ncomp = linop.getNComp(); - Real r = 0.0; - for (int alev = 0; alev <= finest_amr_lev; ++alev) - { - MultiFab* pmf = &(rhs[alev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, rhs[alev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::MLRhsNormInf: not EB Factory"); - } - } -#endif - for (int n=0; nnorm0(*fine_mask[alev],n,0,true)); - } else { - r = std::max(r, pmf->norm0(n,0,true)); - } - } + Real r = 0.0_rt; + for (int alev = 0; alev <= finest_amr_lev; ++alev) { + auto t = linop.AnyNormInfMask(alev, rhs[alev], true); + r = std::max(r, t); } if (!local) ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub()); return r; } void -MLMG::buildFineMask () -{ - BL_PROFILE("MLMG::buildFineMask()"); - - if (!fine_mask.empty()) return; - - fine_mask.clear(); - fine_mask.resize(namrlevs); - - const auto& amrrr = linop.AMRRefRatio(); - for (int alev = 0; alev < finest_amr_lev; ++alev) - { - fine_mask[alev] = std::make_unique - (makeFineMask(rhs[alev], rhs[alev+1], IntVect(0), IntVect(amrrr[alev]), - Periodicity::NonPeriodic(), 1, 0)); - } - - if (!linop.isCellCentered()) { - for (int alev = 0; alev < finest_amr_lev; ++alev) { - linop.fixUpResidualMask(alev, *fine_mask[alev]); - } - } -} - -void -MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_rhs) +MLMG::prepareForSolve (Vector& a_sol, const Vector& a_rhs) { BL_PROFILE("MLMG::prepareForSolve()"); @@ -1147,7 +834,6 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_sol, const VectornGrowVect() == ng_sol) + else if (linop.AnyGrowVect(a_sol[alev]) == ng_sol) { - sol[alev] = a_sol[alev]; - sol[alev]->setBndry(0.0); + sol[alev] = linop.AnyMakeAlias(a_sol[alev]); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = true; } else { if (!solve_called) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol_raii[alev]->setBndry(0.0); - sol[alev] = sol_raii[alev].get(); + linop.AnyCopy(sol[alev], a_sol[alev], IntVect(0)); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = false; } } @@ -1202,10 +887,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const VectorboxArray(), a_rhs[alev]->DistributionMap(), ncomp, ng_rhs, - MFInfo(), *linop.Factory(alev)); + rhs[alev] = linop.AnyMake(alev, 0, ng_rhs); } - MultiFab::Copy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs); + linop.AnyCopy(rhs[alev], a_rhs[alev], ng_rhs); linop.applyMetricTerm(alev, 0, rhs[alev]); linop.unimposeNeumannBC(alev, rhs[alev]); linop.applyInhomogNeumannTerm(alev, rhs[alev]); @@ -1215,38 +899,37 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(linop.Factory(alev)); if (factory) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - amrex::EB_set_covered(*sol[alev], 0, ncomp, val); + linop.AnySetCoveredToZero(rhs[alev]); + linop.AnySetCoveredToZero(sol[alev]); } #endif } for (int falev = finest_amr_lev; falev > 0; --falev) { - linop.averageDownSolutionRHS(falev-1, *sol[falev-1], rhs[falev-1], *sol[falev], rhs[falev]); + linop.AnyAverageDownSolutionRHS(falev-1, sol[falev-1], rhs[falev-1], + sol[falev], rhs[falev]); } // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { - computeVolInv(); makeSolvable(); } IntVect ng = linop.isCellCentered() ? IntVect(0) : IntVect(1); if (cf_strategy == CFStrategy::ghostnodes) ng = ng_rhs; if (!solve_called) { - linop.make(res, ncomp, ng); - linop.make(rescor, ncomp, ng); + linop.make(res, ng); + linop.make(rescor, ng); } for (int alev = 0; alev <= finest_amr_lev; ++alev) { const int nmglevs = linop.NMGLevels(alev); for (int mglev = 0; mglev < nmglevs; ++mglev) { - res[alev][mglev].setVal(0.0); - rescor[alev][mglev].setVal(0.0); + linop.AnySetToZero(res [alev][mglev]); + linop.AnySetToZero(rescor[alev][mglev]); } } @@ -1261,12 +944,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(res[alev][mglev].boxArray(), - res[alev][mglev].DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[alev][mglev]); } } @@ -1280,12 +960,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][mglev]->boxArray(), - cor[alev][mglev]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor_hold[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor_hold[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor_hold[alev][mglev]); } } for (int alev = 1; alev < finest_amr_lev; ++alev) @@ -1294,31 +971,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][0]->boxArray(), - cor[alev][0]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,0)); - } - cor_hold[alev][0]->setVal(0.0); - } - - buildFineMask(); - - if (!solve_called) - { - scratch.resize(namrlevs); -#ifdef AMREX_USE_EB - if (linop.isCellCentered()) { - for (int alev=0; alev < namrlevs; ++alev) { - if (rhs[alev].hasEBFabFactory()) { - scratch[alev] = std::make_unique(rhs[alev].boxArray(), - rhs[alev].DistributionMap(), - ncomp, 0, MFInfo(), - *linop.Factory(alev)); - } - } + cor_hold[alev][0] = linop.AnyMake(alev, 0, _ng); } -#endif + linop.AnySetToZero(cor_hold[alev][0]); } if (linop.m_parent) { @@ -1379,7 +1034,7 @@ MLMG::getGradSolution (const Vector >& a_grad_so { BL_PROFILE("MLMG::getGradSolution()"); for (int alev = 0; alev <= finest_amr_lev; ++alev) { - linop.compGrad(alev, a_grad_sol[alev], *sol[alev], a_loc); + linop.compGrad(alev, a_grad_sol[alev], sol[alev].get(), a_loc); } } @@ -1392,7 +1047,11 @@ MLMG::getFluxes (const Vector >& a_flux, } AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1413,7 +1072,11 @@ void MLMG::getFluxes (const Vector & a_flux, Location a_loc) { AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1459,7 +1122,11 @@ MLMG::getEBFluxes (const Vector& a_eb_flux) } AMREX_ASSERT(sol.size() == a_eb_flux.size()); - getEBFluxes(a_eb_flux, sol); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getEBFluxes(a_eb_flux, solmf); } void @@ -1486,28 +1153,21 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s if (linop.hasHiddenDimension()) ng_sol[linop.hiddenDirection()] = 0; sol.resize(namrlevs); - sol_raii.resize(namrlevs); + sol_is_alias.resize(namrlevs,true); for (int alev = 0; alev < namrlevs; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) + if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol) { - sol[alev] = a_sol[alev]; - } - else if (a_sol[alev]->nGrowVect() == ng_sol) - { - sol[alev] = a_sol[alev]; + sol[alev] = linop.AnyMakeAlias(*a_sol[alev]); + sol_is_alias[alev] = true; } else { - if (sol_raii[alev] == nullptr) + if (sol_is_alias[alev]) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol[alev] = sol_raii[alev].get(); + MultiFab::Copy(sol[alev].get(), *a_sol[alev], 0, 0, ncomp, 0); } } @@ -1521,22 +1181,23 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s const auto& amrrr = linop.AMRRefRatio(); for (int alev = finest_amr_lev; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; + const MultiFab* crse_bcdata = (alev > 0) ? &(sol[alev-1].get()) : nullptr; const MultiFab* prhs = a_rhs[alev]; #if (AMREX_SPACEDIM != 3) int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0; - MultiFab rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, - MFInfo(), *linop.Factory(alev)); + Any rhstmp_a(MultiFab(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, + MFInfo(), *linop.Factory(alev))); + MultiFab& rhstmp = rhstmp_a.get(); MultiFab::Copy(rhstmp, *prhs, 0, 0, ncomp, nghost); - linop.applyMetricTerm(alev, 0, rhstmp); - linop.unimposeNeumannBC(alev, rhstmp); - linop.applyInhomogNeumannTerm(alev, rhstmp); + linop.applyMetricTerm(alev, 0, rhstmp_a); + linop.unimposeNeumannBC(alev, rhstmp_a); + linop.applyInhomogNeumannTerm(alev, rhstmp_a); prhs = &rhstmp; #endif - linop.solutionResidual(alev, *a_res[alev], *sol[alev], *prhs, crse_bcdata); + linop.solutionResidual(alev, *a_res[alev], sol[alev].get(), *prhs, crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, *a_res[alev], *sol[alev], *prhs, - *a_res[alev+1], *sol[alev+1], *a_rhs[alev+1]); + linop.reflux(alev, *a_res[alev], sol[alev].get(), *prhs, + *a_res[alev+1], sol[alev+1].get(), *a_rhs[alev+1]); if (linop.isCellCentered()) { #ifdef AMREX_USE_EB amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); @@ -1604,7 +1265,8 @@ MLMG::apply (const Vector& out, const Vector& a_in) } for (int alev = 0; alev < namrlevs; ++alev) { - linop.applyInhomogNeumannTerm(alev, rh[alev]); + Any a(MultiFab(rh[alev], amrex::make_alias, 0, rh[alev].nComp())); + linop.applyInhomogNeumannTerm(alev, a); } const auto& amrrr = linop.AMRRefRatio(); @@ -1637,215 +1299,45 @@ MLMG::apply (const Vector& out, const Vector& a_in) } } -void -MLMG::averageDownAndSync () -{ - const auto& amrrr = linop.AMRRefRatio(); - - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - - if (linop.isCellCentered()) - { - for (int falev = finest_amr_lev; falev > 0; --falev) - { -#ifdef AMREX_USE_EB - amrex::EB_average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#else - amrex::average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#endif - } - } - else - { - linop.nodalSync(finest_amr_lev, 0, *sol[finest_amr_lev]); - - for (int falev = finest_amr_lev; falev > 0; --falev) - { - const auto& fmf = *sol[falev]; - auto& cmf = *sol[falev-1]; - - MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), amrrr[falev-1]), fmf.DistributionMap(), ncomp, nghost); - amrex::average_down(fmf, tmpmf, 0, ncomp, amrrr[falev-1]); - cmf.ParallelCopy(tmpmf, 0, 0, ncomp); - linop.nodalSync(falev-1, 0, cmf); - } - } -} - -void -MLMG::computeVolInv () -{ - if (solve_called) return; - - if (linop.isCellCentered()) - { - volinv.resize(namrlevs); - for (int amrlev = 0; amrlev < namrlevs; ++amrlev) { - volinv[amrlev].resize(linop.NMGLevels(amrlev)); - } - - // We don't need to compute for every level - - auto f = [&] (int amrlev, int mglev) { -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - volinv[amrlev][mglev] = vfrac.sum(0,true); - } - else -#endif - { - volinv[amrlev][mglev] - = Real(1.0 / linop.compactify(linop.Geom(amrlev,mglev).Domain()).d_numPts()); - } - }; - - // amrlev = 0, mglev = 0 - f(0,0); - - int mgbottom = linop.NMGLevels(0)-1; - f(0,mgbottom); - -#ifdef AMREX_USE_EB - Real temp1, temp2; - if (rhs[0].hasEBFabFactory()) - { - ParallelAllReduce::Sum({volinv[0][0], volinv[0][mgbottom]}, - ParallelContext::CommunicatorSub()); - temp1 = Real(1.0)/volinv[0][0]; - temp2 = Real(1.0)/volinv[0][mgbottom]; - } - else - { - temp1 = volinv[0][0]; - temp2 = volinv[0][mgbottom]; - } - volinv[0][0] = temp1; - volinv[0][mgbottom] = temp2; -#endif - } -} - void MLMG::makeSolvable () { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(0)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(rhs[0], c, vfrac, 0, 1, 0, true) * volinv[0][0]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = rhs[0].sum(c,true) * volinv[0][0]; - } - } - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from rhs component " << c << "\n"; - } - } - for (int alev = 0; alev < namrlevs; ++alev) { - for (int c = 0; c < ncomp; ++c) { - rhs[alev].plus(-offset[c], c, 1); - } -#ifdef AMREX_USE_EB - if (rhs[alev].hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - } -#endif + auto const& offset = linop.getSolvabilityOffset(0, 0, rhs[0]); + if (verbose >= 4) { + const int ncomp = offset.size(); + for (int c = 0; c < ncomp; ++c) { + amrex::Print() << "MLMG: Subtracting " << offset[c] << " from rhs component " + << c << "\n"; } } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(0, 0, rhs[0]); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " from rhs\n"; - } - for (int alev = 0; alev < namrlevs; ++alev) { - linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); - } + for (int alev = 0; alev < namrlevs; ++alev) { + linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); } } void -MLMG::makeSolvable (int amrlev, int mglev, MultiFab& mf) +MLMG::makeSolvable (int amrlev, int mglev, Any& mf) { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(mf, c, vfrac, 0, 1, 0, true) * volinv[amrlev][mglev]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = mf.sum(c,true) * volinv[amrlev][mglev]; - } - } - - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from mf component c = " << c << "\n"; - } - } - + auto const& offset = linop.getSolvabilityOffset(amrlev, mglev, mf); + if (verbose >= 4) { + const int ncomp = offset.size(); for (int c = 0; c < ncomp; ++c) { - mf.plus(-offset[c], c, 1); + amrex::Print() << "MLMG: Subtracting " << offset[c] + << " from mf component c = " << c + << " on level (" << amrlev << ", " << mglev << ")\n"; } -#ifdef AMREX_USE_EB - if (mf.hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(mf, 0, ncomp, val); - } -#endif - } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(amrlev, mglev, mf); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " on level (" << amrlev << ", " - << mglev << ")\n"; - } - linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } + linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithHypre (Any& a_x, const Any& a_b) { + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); + const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; @@ -1905,18 +1397,21 @@ MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) // For precision reasons we enforce that the average of the correction from hypre is 0 if (linop.isSingular(amrlev) && linop.getEnforceSingularSolvable()) { - makeSolvable(amrlev, mglev, x); + makeSolvable(amrlev, mglev, a_x); } } #endif void -MLMG::bottomSolveWithPETSc (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithPETSc (Any& a_x, const Any& a_b) { #if !defined(AMREX_USE_PETSC) - amrex::ignore_unused(x,b); + amrex::ignore_unused(a_x,a_b); amrex::Abort("bottomSolveWithPETSc is called without building with PETSc"); #else + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); const int ncomp = linop.getNComp(); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ncomp == 1, "bottomSolveWithPETSc doesn't work with ncomp > 1"); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index affe4c73eaf..50f20e22915 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -116,9 +116,11 @@ public : } virtual void getFluxes (const Vector& a_flux, const Vector& a_sol) const final override; - virtual void unimposeNeumannBC (int amrlev, MultiFab& rhs) const final override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; + virtual void unimposeNeumannBC (int amrlev, Any& rhs) const final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, MultiFab& /*sol*/, Location /*loc*/) const final override { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp index 79358b58898..c0efaed25d6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp @@ -150,13 +150,16 @@ MLNodeLaplacian::resizeMultiGrid (int new_size) } void -MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const +MLNodeLaplacian::unimposeNeumannBC (int amrlev, Any& a_rhs) const { if (m_coarsening_strategy == CoarseningStrategy::RAP) { const Box& nddom = amrex::surroundingNodes(Geom(amrlev).Domain()); const auto lobc = LoBC(); const auto hibc = HiBC(); + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); #ifdef AMREX_USE_OMP @@ -171,14 +174,17 @@ MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const } } -Real -MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); if (m_coarsening_strategy == CoarseningStrategy::RAP) { + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); if (mglev == 0 && factory && !factory->isAllRegular()) { @@ -229,7 +235,7 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } else #endif { @@ -279,16 +285,21 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } } else { - return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, rhs); + return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, a_rhs); } } void -MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const +MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& a_offset) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + Real offset = a_offset[0]; + if (m_coarsening_strategy == CoarseningStrategy::RAP) { #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp index df5ab489d2f..339ca98e072 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp @@ -26,7 +26,11 @@ MLNodeLaplacian::averageDownCoeffs () { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { +#if (AMREX_SPACEDIM == 1) + int ndims = 1; +#else int ndims = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int idim = 0; idim < ndims; ++idim) { if (m_sigma[amrlev][mglev][idim] == nullptr) { @@ -101,7 +105,11 @@ MLNodeLaplacian::averageDownCoeffsSameAmrLevel (int amrlev) if (m_coarsening_strategy != CoarseningStrategy::Sigma) return; +#if (AMREX_SPACEDIM == 1) + const int nsigma = 1; +#else const int nsigma = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index c46f4a250f2..1935be89f1d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -36,10 +36,6 @@ public: const Vector const*>& a_factory = {}, int a_eb_limit_coarsening = -1); - virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, - const MultiFab* = nullptr, const MultiFab* = nullptr, - const MultiFab* = nullptr) final override {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry=nullptr) const final override; @@ -59,20 +55,15 @@ public: amrex::Abort("AMReX_MLNodeLinOp::compGrad::How did we get here?"); } - virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const final override {} virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} - virtual void fillSolutionBC (int /*amrlev*/, MultiFab& /*sol*/, - const MultiFab* /*crse_bcdata*/=nullptr) final override { - amrex::Abort("AMReX_MLNodeLinOp::fillSolutionBC::How did we get here?"); - } - - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; - - virtual void prepareForSolve () override {} + virtual void prepareForSolve () override; virtual bool isSingular (int amrlev) const override { return (amrlev == 0) ? m_is_bottom_singular : false; } @@ -86,7 +77,7 @@ public: virtual void Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const = 0; virtual void Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rsh) const = 0; - virtual void nodalSync (int amrlev, int mglev, MultiFab& mf) const final override; + void nodalSync (int amrlev, int mglev, MultiFab& mf) const; virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const final override { amrex::Abort("MLNodeLinOp::makeNLinOp: N-Solve not supported"); @@ -102,6 +93,19 @@ public: // omask is either 0 or 1. 1 means the node is an unknown. 0 means it's known. void setOversetMask (int amrlev, const iMultiFab& a_omask); + virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int, Any&, Any const&) const final override { } + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& nghost) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypreNodeLap( int bottom_verbose, @@ -139,6 +143,8 @@ protected: MultiFab m_bottom_dot_mask; MultiFab m_coarse_dot_mask; + Vector > m_norm_fine_mask; + #ifdef AMREX_USE_EB CoarseningStrategy m_coarsening_strategy = CoarseningStrategy::RAP; #else diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index baf0f5edb42..b5173b71f5f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifdef AMREX_USE_OMP @@ -83,6 +84,22 @@ MLNodeLinOp::define (const Vector& a_geom, m_has_fine_bndry[amrlev] = std::make_unique >(m_grids[amrlev][0], m_dmap[amrlev][0]); } + + m_norm_fine_mask.resize(m_num_amr_levels-1); + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(amrex::convert(m_grids[amrlev][0], IntVect(1)), m_dmap[amrlev][0], + amrex::convert(m_grids[amrlev+1][0], IntVect(1)), + IntVect(m_amr_ref_ratio[amrlev]), 1, 0)); + } +} + +void +MLNodeLinOp::prepareForSolve () +{ + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + fixUpResidualMask(amrlev, *m_norm_fine_mask[amrlev]); + } } std::unique_ptr @@ -177,17 +194,16 @@ MLNodeLinOp::xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, return result; } -void -MLNodeLinOp::applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const -{ -} - -Real -MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); + + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask; const auto& mask_ma = mask.const_arrays(); const auto& rhs_ma = rhs.const_arrays(); @@ -203,13 +219,16 @@ MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) c Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } void -MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& rhs, Real offset) const +MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& a_rhs, + Vector const& offset) const { - rhs.plus(-offset, 0, 1); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + rhs.plus(-offset[0], 0, 1); } namespace { @@ -448,6 +467,119 @@ MLNodeLinOp::resizeMultiGrid (int new_size) MLLinOp::resizeMultiGrid(new_size); } +Real +MLNodeLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + return MFNormInf(mf, fine_mask, local); +} + +void +MLNodeLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& nghost) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + + AMREX_ALWAYS_ASSERT(refratio == 2 || refratio == 4); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box fbx = mfi.tilebox(); + fbx.grow(nghost); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = crse.const_array(mfi); + + if (refratio == 2) { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } else { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); + }); + } + } +} + +void +MLNodeLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + const int ncomp = getNComp(); + const int finest_amr_lev = NAMRLevels() - 1; + + nodalSync(finest_amr_lev, 0, sol[finest_amr_lev].get()); + + for (int falev = finest_amr_lev; falev > 0; --falev) + { + const auto& fmf = sol[falev ].get(); + auto& cmf = sol[falev-1].get(); + + auto rr = AMRRefRatio(falev-1); + MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), rr), fmf.DistributionMap(), ncomp, 0); + amrex::average_down(fmf, tmpmf, 0, ncomp, rr); + cmf.ParallelCopy(tmpmf, 0, 0, ncomp); + nodalSync(falev-1, 0, cmf); + } +} + +void +MLNodeLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + AMREX_ALWAYS_ASSERT(refratio == 2); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, 0); + cfine.ParallelCopy(crse, 0, 0, ncomp, 0, 0, crse_geom.periodicity()); + cmf = & cfine; + } + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& fbx = mfi.tilebox(); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = cmf->const_array(mfi); + + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } +} + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) std::unique_ptr MLNodeLinOp::makeHypreNodeLap (int bottom_verbose, const std::string& options_namespace) const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H index 81dd431d953..41f8fbf1cae 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H @@ -70,6 +70,10 @@ public: virtual void copyNSolveSolution (MultiFab& dst, MultiFab const& src) const final override; + //! Compute dphi/dn on domain faces after the solver has converged. + void get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi); + private: Vector m_is_singular; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp index ce27eb936fd..15ee75e961a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp @@ -702,4 +702,63 @@ MLPoisson::copyNSolveSolution (MultiFab& dst, MultiFab const& src) const dst.ParallelCopy(src); } +void +MLPoisson::get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi) +{ + BL_PROFILE("MLPoisson::dpdn_faces()"); + + // We do not need to call applyBC because this function is used by the + // OpenBC solver after solver has converged. That means the BC has been + // filled to check the residual. + + Box const& domain0 = m_geom[0][0].Domain(); + AMREX_D_TERM(const Real dxi = m_geom[0][0].InvCellSize(0);, + const Real dyi = m_geom[0][0].InvCellSize(1);, + const Real dzi = m_geom[0][0].InvCellSize(2);) + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(phi); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain0[face]) { + int dir = face.coordDir(); + Array4 const& p = phi.const_array(mfi); + Array4 const& gp = dpdn[dir]->array(mfi); + Box const& b2d = amrex::bdryNode(vbx,face); + if (dir == 0) { + // because it's dphi/dn, not dphi/dx. + Real fac = dxi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i-1,j,k)); + }); + } +#if (AMREX_SPACEDIM > 1) + else if (dir == 1) { + Real fac = dyi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j-1,k)); + }); + } +#if (AMREX_SPACEDIM > 2) + else { + Real fac = dzi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j,k-1)); + }); + } +#endif +#endif + } + } + } +} + } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp index d4e77f312dc..0750ffdd969 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp @@ -210,9 +210,16 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc if (mglev >= m_kappa[amrlev].size()) return; - applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry ); + applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -247,20 +254,65 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } if (m_overset_mask[amrlev][mglev]) { const auto& osm = m_overset_mask[amrlev][mglev]->array(mfi); @@ -288,18 +340,18 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, #if (AMREX_SPACEDIM == 1) amrex::ignore_unused(amrlev,mglev,vel,bc_mode,bndry); #else + const int inhomog = bc_mode == BCMode::Inhomogeneous; const int imaxorder = maxorder; const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); - - // Domain and coarse-fine boundaries are handled below. + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); @@ -315,14 +367,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -341,14 +392,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; #if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_FOR_1D ( 4, icorner, { mltensor_fill_corners(icorner, vbx, velfab, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -360,18 +410,40 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; // only edge vals used in 3D stencil - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } #endif } - // Notet that it is incorrect to call EnforcePeriodicity on vel. #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp index 705f38052d1..d395ecdac13 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp @@ -16,9 +16,15 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, const int ncomp = getNComp(); MLABecLaplacian::compFlux(amrlev, fluxes, sol, loc); - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -52,20 +58,59 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(mfi.tilebox())) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { const Box& nbx = mfi.nodaltilebox(idim); @@ -95,33 +140,36 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux const int mglev = 0; - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - Array fluxfab_tmp; + Array4 const vfab = sol.const_array(mfi); + AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, + Box const ybx = mfi.nodaltilebox(1);, + Box const zbx = mfi.nodaltilebox(2);) + AMREX_D_TERM(Array4 const fxfab = fluxes[0]->array(mfi);, + Array4 const fyfab = fluxes[1]->array(mfi);, + Array4 const fzfab = fluxes[2]->array(mfi);) - for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); +// The derivatives are put in the array with the following order: +// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 +// in 2D: dU/dx, dV/dx, dU/dy, dV/dy +// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz + if (domain.strictly_contains(mfi.tilebox())) { AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM ( xbx, txbx, { @@ -136,23 +184,39 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); } ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } } + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv,bvxlo,bvxhi,bct,dlo,dhi); + } + , ybx, tybx, + { + mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv,bvylo,bvyhi,bct,dlo,dhi); + } + , zbx, tzbx, + { + mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv,bvzlo,bvzhi,bct,dlo,dhi); + } + ); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H index 8f10f08ec58..a40fa4611a8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H @@ -17,110 +17,168 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalylo, Array4 const& bcvalxhi, Array4 const& bcvalyhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int oxhi = 2; - constexpr int oyhi = 3; - constexpr int xdir = 0; - constexpr int ydir = 1; + constexpr int k = 0; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (icorner) { - case 0: { - // xlo & ylo - if (mxlo(vlo.x-1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel(vlo.x-1,vlo.y-1,0,icomp) = vel(vlo.x-1,vlo.y,0,icomp) - + vel(vlo.x,vlo.y-1,0,icomp) - vel(vlo.x,vlo.y,0,icomp); - } else if (vlo.x == dlo.x || mylo(vlo.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + if (icorner == 0) { // xlo & ylo + int const i = vlo.x-1; + int const j = vlo.y-1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; // i+1,j is a valid cell inside domain + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; // i+1,j is a ghost cell inside domain + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 1: { - // xhi & ylo - if (mxhi(vhi.x+1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel(vhi.x+1,vlo.y-1,0,icomp) = vel(vhi.x+1,vlo.y,0,icomp) - + vel(vhi.x,vlo.y-1,0,icomp) - vel(vhi.x,vlo.y,0,icomp); - } else if (vhi.x == dhi.x || mylo(vhi.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 1) { // xhi & ylo + int const i = vhi.x+1; + int const j = vlo.y-1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 2: { - // xlo & yhi - if (mxlo(vlo.x-1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel(vlo.x-1,vhi.y+1,0,icomp) = vel(vlo.x-1,vhi.y,0,icomp) - + vel(vlo.x,vhi.y+1,0,icomp) - vel(vlo.x,vhi.y,0,icomp); - } else if (vlo.x == dlo.x || myhi(vlo.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 2) { // xlo & yhi + int const i = vlo.x-1; + int const j = vhi.y+1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 3: { - // xhi & yhi - if (mxhi(vhi.x+1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel(vhi.x+1,vhi.y+1,0,icomp) = vel(vhi.x+1,vhi.y,0,icomp) - + vel(vhi.x,vhi.y+1,0,icomp) - vel(vhi.x,vhi.y,0,icomp); - } else if (vhi.x == dhi.x || myhi(vhi.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 3) { // xhi & yhi + int const i = vhi.x+1; + int const j = vhi.y+1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; - } - default: {} } } } @@ -137,11 +195,12 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); Real divu = dvdy; Real xif = kapx(i,j,0); Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta @@ -164,11 +223,80 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + // Three BC types: reflect odd, neumann, and dirichlet + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx; Real xif = kapy(i,j,0); Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta @@ -241,13 +369,14 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); fx(i,j,0,0) = dudx; fx(i,j,0,1) = dvdx; fx(i,j,0,2) = dudy; @@ -266,11 +395,74 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; + Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; + fy(i,j,0,0) = dudx; + fy(i,j,0,1) = dvdx; + fy(i,j,0,2) = dudy; + fy(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; + Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,0,0) = dudx; + fx(i,j,0,1) = dvdx; + fx(i,j,0,2) = dudy; + fx(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; fy(i,j,0,0) = dudx; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H index a4a4c7df9ef..a5de05a385e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H @@ -6,6 +6,643 @@ namespace amrex { +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool ylo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !ylo_domain)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mylo, + Array4 const& bcvalxhi, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool ylo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !ylo_domain)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& myhi, + Array4 const& bcvalxlo, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool yhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !yhi_domain)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool yhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !yhi_domain)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzlo, + Array4 const& bcvalxlo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zlo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zlo_domain)) { + bool x_interior = mzlo(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zlo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zlo_domain)) { + bool x_interior = mzlo(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zhi_domain)) { + bool x_interior = mzhi(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzhi, + Array4 const& bcvalxhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zhi_domain)) { + bool x_interior = mzhi(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zlo_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k+1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzlo, + Array4 const& bcvalyhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zlo_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k+1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzhi, + Array4 const& bcvalylo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zhi_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k-1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zhi_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k-1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +#ifdef AMREX_USE_EB AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& vel, @@ -21,495 +658,680 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { switch (icorner) { case 0: { // xlo & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vlo.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vlo.z-1; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zlo_domain)) { + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 1: { // xhi & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vlo.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vlo.z-1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 2: { // xlo & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vhi.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 3: { // xhi & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vhi.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 4: { // xlo & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y , vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y-1, vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - - vel(vlo.x-1, vlo.y , vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 5: { // xhi & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y-1,vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vlo.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 6: { // xlo & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x-1,vhi.y ,vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 7: { // xhi & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vhi.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; @@ -518,9 +1340,10 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box } } } +#endif -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box +inline +void mltensor_fill_edges (Box const& vbox, // vbox: the valid box Array4 const& vel, Array4 const& mxlo, Array4 const& mylo, @@ -534,522 +1357,486 @@ void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept + { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (iedge) { - case 0: { - // xlo & ylo - if (vlo.x == dlo.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vlo.y-1,k,icomp) - = vel(vlo.x ,vlo.y-1,k,icomp) - + vel(vlo.x-1,vlo.y ,k,icomp) - - vel(vlo.x ,vlo.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vlo.y-1,k),IntVect(vlo.x-1,vlo.y-1,k)); - if (mylo(vlo.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } - } - break; + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + + for (int k = vlo.z; k <= vhi.z; ++k) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + + for (int j = vlo.y; j <= vhi.y; ++j) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + + for (int i = vlo.x; i <= vhi.x; ++i) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } +} + +#ifdef AMREX_USE_GPU +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges (int const bid, int const tid, int const bdim, + Box const& vbox, // vbox: the valid box + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const auto blen = amrex::length(vbox); + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + if (bid == 0) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); } - case 1: { - // xhi & ylo - if (vhi.x == dhi.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vlo.y-1,k,icomp) - = vel(vhi.x ,vlo.y-1,k,icomp) - + vel(vhi.x+1,vlo.y ,k,icomp) - - vel(vhi.x ,vlo.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vlo.y-1,k),IntVect(vhi.x+1,vlo.y-1,k)); - if (mylo(vhi.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } else if (bid == 1) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + } + } else if (bid == 2) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + } + } else if (bid == 3) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + } else if (bid == 4) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + } + } else if (bid == 5) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + } + } else if (bid == 6) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + } + } else if (bid == 7) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + } else if (bid == 8) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + } + } else if (bid == 9) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + } + } else if (bid == 10) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + } + } else if (bid == 11) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } + } +} +#endif + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i-1,j,k+1,n)-vel(i,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i,j-1,k+1,n)-vel(i,j,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j,k-1,n)-vel(i-1,j,k,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i,j+1,k-1,n)-vel(i,j-1,k,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*(dudy); + fx(i,j,k,2) = -mut*(dudz); } - break; } - case 2: { - // xlo & yhi - if (vlo.x == dlo.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vhi.y+1,k,icomp) - = vel(vlo.x ,vhi.y+1,k,icomp) - + vel(vlo.x-1,vhi.y ,k,icomp) - - vel(vlo.x ,vhi.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vhi.y+1,k),IntVect(vlo.x-1,vhi.y+1,k)); - if (myhi(vlo.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*(dvdx); + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*(dvdz); } - break; } - case 3: { - // xhi & yhi - if (vhi.x == dhi.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vhi.y+1,k,icomp) - = vel(vhi.x ,vhi.y+1,k,icomp) - + vel(vhi.x+1,vhi.y ,k,icomp) - - vel(vhi.x ,vhi.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vhi.y+1,k),IntVect(vhi.x+1,vhi.y+1,k)); - if (myhi(vhi.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta + Real mut = etaz(i,j,k,0); + fz(i,j,k,0) = -mut*(dwdx); + fz(i,j,k,1) = -mut*(dwdy); + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; } - break; } - case 4: { - // xlo & zlo - if (vlo.x == dlo.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vlo.z-1,icomp) - = vel(vlo.x ,j,vlo.z-1,icomp) - + vel(vlo.x-1,j,vlo.z ,icomp) - - vel(vlo.x ,j,vlo.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vlo.z-1),IntVect(vlo.x-1,j,vlo.z-1)); - if (mzlo(vlo.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxlo(i-1,j,k+1,n)-bvxlo(i-1,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 5: { - // xhi & zlo - if (vhi.x == dhi.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vlo.z-1,icomp) - = vel(vhi.x ,j,vlo.z-1,icomp) - + vel(vhi.x+1,j,vlo.z ,icomp) - - vel(vhi.x ,j,vlo.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vlo.z-1),IntVect(vhi.x+1,j,vlo.z-1)); - if (mzlo(vhi.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxhi(i,j,k+1,n)-bvxhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i-1,j,k+1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 6: { - // xlo & zhi - if (vlo.x == dlo.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vhi.z+1,icomp) - = vel(vlo.x ,j,vhi.z+1,icomp) - + vel(vlo.x-1,j,vhi.z ,icomp) - - vel(vlo.x ,j,vhi.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_xface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vhi.z+1),IntVect(vlo.x-1,j,vhi.z+1)); - if (mzhi(vlo.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvylo(i,j-1,k+1,n)-bvylo(i,j-1,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 7: { - // xhi & zhi - if (vhi.x == dhi.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vhi.z+1,icomp) - = vel(vhi.x ,j,vhi.z+1,icomp) - + vel(vhi.x+1,j,vhi.z ,icomp) - - vel(vhi.x ,j,vhi.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vhi.z+1),IntVect(vhi.x+1,j,vhi.z+1)); - if (mzhi(vhi.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvyhi(i,j,k+1,n)-bvyhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j-1,k+1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 8: { - // ylo & zlo - if (vlo.y == dlo.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vlo.z-1,icomp) - = vel(i,vlo.y ,vlo.z-1,icomp) - + vel(i,vlo.y-1,vlo.z ,icomp) - - vel(i,vlo.y ,vlo.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_yface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vlo.z-1),IntVect(i,vlo.y-1,vlo.z-1)); - if (mzlo(i,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzlo(i+1,j,k-1,n)-bvzlo(i-1,j,k-1,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 9: { - // yhi & zlo - if (vhi.y == dhi.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vlo.z-1,icomp) - = vel(i,vhi.y ,vlo.z-1,icomp) - + vel(i,vhi.y+1,vlo.z ,icomp) - - vel(i,vhi.y ,vlo.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vlo.z-1),IntVect(i,vhi.y+1,vlo.z-1)); - if (mzlo(i,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzhi(i+1,j,k,n)-bvzhi(i-1,j,k,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 10: { - // ylo & zhi - if (vlo.y == dlo.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vhi.z+1,icomp) - = vel(i,vlo.y ,vhi.z+1,icomp) - + vel(i,vlo.y-1,vhi.z ,icomp) - - vel(i,vlo.y ,vhi.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddx = mltensor_dx_on_zface(i,j,k,n,vel,dxi); + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vhi.z+1),IntVect(i,vlo.y-1,vhi.z+1)); - if (mzhi(i,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzlo(i,j+1,k-1,n)-bvzlo(i,j-1,k-1,n))*(Real(0.5)*dyi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } - case 11: { - // yhi & zhi - if (vhi.y == dhi.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vhi.z+1,icomp) - = vel(i,vhi.y ,vhi.z+1,icomp) - + vel(i,vhi.y+1,vhi.z ,icomp) - - vel(i,vhi.y ,vhi.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vhi.z+1),IntVect(i,vhi.y+1,vhi.z+1)); - if (mzhi(i,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzhi(i,j+1,k,n)-bvzhi(i,j-1,k,n))*(Real(0.5)*dyi); } - break; - } - default: {} + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } + } else { + ddy = mltensor_dy_on_zface(i,j,k,n,vel,dyi); } + return ddy; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -1057,7 +1844,13 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& vel, Array4 const& etax, Array4 const& kapx, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const Real dzi = dxinv[2]; @@ -1067,12 +1860,11 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta @@ -1090,7 +1882,13 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& vel, Array4 const& etay, Array4 const& kapy, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dzi = dxinv[2]; @@ -1100,12 +1898,11 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta @@ -1123,7 +1920,13 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, Array4 const& vel, Array4 const& etaz, Array4 const& kapz, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dyi = dxinv[1]; @@ -1133,12 +1936,11 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta @@ -1242,13 +2044,13 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i-1,j+1,k,2)-vel(i,j-1,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i-1,j,k+1,1)-vel(i,j,k-1,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); fx(i,j,k,0) = dudx; fx(i,j,k,1) = dvdx; @@ -1281,17 +2083,17 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j-1,k,2)-vel(i-1,j,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi); Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; - Real dudz = (vel(i,j,k+1,0)+vel(i,j-1,k+1,0)-vel(i,j,k-1,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); fy(i,j,k,0) = dudx; fy(i,j,k,1) = dvdx; @@ -1324,13 +2126,13 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j,k-1,1)-vel(i-1,j,k,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); - Real dudy = (vel(i,j+1,k,0)+vel(i,j+1,k-1,0)-vel(i,j-1,k,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; @@ -1351,6 +2153,138 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,k,0) - vel(i-1,j,k,0))*dxi; + Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; + Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,k,0) = dudx; + fx(i,j,k,1) = dvdx; + fx(i,j,k,2) = dwdx; + fx(i,j,k,3) = dudy; + fx(i,j,k,4) = dvdy; + fx(i,j,k,5) = dwdy; + fx(i,j,k,6) = dudz; + fx(i,j,k,7) = dvdz; + fx(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; + Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; + Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + fy(i,j,k,0) = dudx; + fy(i,j,k,1) = dvdx; + fy(i,j,k,2) = dwdx; + fy(i,j,k,3) = dudy; + fy(i,j,k,4) = dvdy; + fy(i,j,k,5) = dwdy; + fy(i,j,k,6) = dudz; + fy(i,j,k,7) = dvdz; + fy(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; + Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; + Real dwdz = (vel(i,j,k,2) - vel(i,j,k-1,2))*dzi; + fz(i,j,k,0) = dudx; + fz(i,j,k,1) = dvdx; + fz(i,j,k,2) = dwdx; + fz(i,j,k,3) = dudy; + fz(i,j,k,4) = dvdy; + fz(i,j,k,5) = dwdy; + fz(i,j,k,6) = dudz; + fz(i,j,k,7) = dvdz; + fz(i,j,k,8) = dwdz; + + } + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H index 4440f57e7a8..33457ec1ced 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H @@ -5,6 +5,123 @@ #include #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i-1,j+1,k,n)-vel(i,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j-1,k,n)-vel(i-1,j,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxlo(i-1,j+1,k,n)-bvxlo(i-1,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxhi(i,j+1,k,n)-bvxhi(i,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i-1,j+1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mltensor_dy_on_xface(i,j,k,n,vel,dyi); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvylo(i+1,j-1,k,n)-bvylo(i-1,j-1,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvyhi(i+1,j,k,n)-bvyhi(i-1,j,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mltensor_dx_on_yface(i,j,k,n,vel,dxi); + } + return ddx; +} +} + #if (AMREX_SPACEDIM == 1) #include #elif (AMREX_SPACEDIM == 2) diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H new file mode 100644 index 00000000000..00d589e34b4 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H @@ -0,0 +1,141 @@ +#ifndef AMREX_OPENBC_H_ +#define AMREX_OPENBC_H_ +#include + +#include +#include + +namespace amrex +{ + +namespace openbc { + + static constexpr int M = 7; // highest order of moments + static constexpr int P = 3; + + struct Moments + { + typedef GpuArray array_type; + array_type mom; + Real x, y, z; + Orientation face; + }; + + struct MomTag + { + Array4 gp; + Box b2d; + Orientation face; + int offset; + }; + + std::ostream& operator<< (std::ostream& os, Moments const& mom); +} + +#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +template<> +struct Gpu::SharedMemory +{ + AMREX_GPU_DEVICE openbc::Moments::array_type* dataPtr () noexcept { + AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(openbc::Moments::array_type,amrex_openbc_momarray);, + extern __shared__ openbc::Moments::array_type amrex_openbc_momarray[];) + return amrex_openbc_momarray; + } +}; +#endif + +/** + * \brief Open Boundary Poisson Solver + * + * References: + * (1) The Solution of Poisson's Equation for Isolated Source + * Distributions, R. A. James, 1977, JCP 25, 71 + * (2) A Local Corrections Algorithm for Solving Poisson's Equation in Three + * Dimensions, P. McCorquodale, P. Colella, G. T. Balls, & S. B. Baden, + * 2007, Communications in Applied Mathematics and Computational Science, + * 2, 1, 57-81 + */ +class OpenBCSolver +{ +public: + OpenBCSolver (); + + OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + ~OpenBCSolver (); + + OpenBCSolver (const OpenBCSolver&) = delete; + OpenBCSolver (OpenBCSolver&&) = delete; + OpenBCSolver& operator= (const OpenBCSolver&) = delete; + OpenBCSolver& operator= (OpenBCSolver&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + void setVerbose (int v) noexcept; + void setBottomVerbose (int v) noexcept; + + void useHypre (bool use_hypre) noexcept; + + Real solve (const Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs); + +public: // public for cuda + + void compute_moments (Gpu::DeviceVector& moments); + void compute_potential (Gpu::DeviceVector const& moments); + void interpolate_potential (MultiFab& solg); + +private: + +#ifdef AMREX_USE_MPI + void bcast_moments (Gpu::DeviceVector& moments); +#endif + + int m_verbose = 0; + int m_bottom_verbose = 0; + Vector m_geom; + Vector m_grids; + Vector m_dmap; + LPInfo m_info; + std::unique_ptr m_poisson_1; + std::unique_ptr m_poisson_2; + std::unique_ptr m_mlmg_1; + std::unique_ptr m_mlmg_2; + BottomSolver m_bottom_solver_type = BottomSolver::bicgstab; + + int m_coarsen_ratio = 0; + Array m_dpdn; + Gpu::PinnedVector m_momtags_h; +#ifdef AMREX_USE_GPU + Gpu::DeviceVector m_momtags_d; + Gpu::PinnedVector m_ngpublocks_h; + Gpu::DeviceVector m_ngpublocks_d; + int m_nthreads_momtag; +#endif + + int m_nblocks_local = 0; + int m_nblocks = 0; +#ifdef AMREX_USE_MPI + Vector m_countvec; + Vector m_offset; +#endif + + IntVect m_ngrowdomain; + MultiFab m_crse_grown_faces_phi; + MultiFab m_phind; + BoxArray m_bag; + + BoxArray m_ba_all; + DistributionMapping m_dm_all; + Geometry m_geom_all; +}; + +} + +#endif diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp new file mode 100644 index 00000000000..9e320d7a55f --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp @@ -0,0 +1,864 @@ +#include +#include +#include + +namespace amrex +{ + +OpenBCSolver::OpenBCSolver () {} + +OpenBCSolver::OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + define(a_geom, a_grids, a_dmap, a_info); +} + +OpenBCSolver::~OpenBCSolver () {} + +void OpenBCSolver::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + BL_PROFILE("OpenBCSoler::define()"); + + m_geom = a_geom; + m_grids = a_grids; + m_dmap = a_dmap; + m_info = a_info; + for (auto& grids : m_grids) { + grids.enclosedCells(); + } + + Box const domain0 = m_geom[0].Domain(); + m_coarsen_ratio = 8; + AMREX_ALWAYS_ASSERT(domain0.coarsenable(m_coarsen_ratio)); + int N1d = static_cast(std::round(std::pow(domain0.d_numPts(),1./3.))); + while (domain0.coarsenable(m_coarsen_ratio*2) + && 4*m_coarsen_ratio*m_coarsen_ratio <= N1d) { + m_coarsen_ratio *= 2; + } + + int ntags = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + Box lo = amrex::coarsen(amrex::bdryLo(domain0, idim), m_coarsen_ratio); + Box hi = amrex::coarsen(amrex::bdryHi(domain0, idim), m_coarsen_ratio); + BoxList bl({lo,hi}); + IntVect chunk = lo.length(); + while (bl.size() < ParallelContext::NProcsSub()) { + IntVect chunk_prev = chunk; + for (int jdim = AMREX_SPACEDIM-1; jdim >= 0; --jdim) { + if (jdim != idim) { + int new_chunk_size = chunk[jdim] / 2; + if (bl.size() < ParallelContext::NProcsSub() + && new_chunk_size > 0) { + chunk[jdim] = new_chunk_size; + bl.maxSize(chunk); + } + } + } + if (chunk == chunk_prev) { + break; + } + } + int mgs = std::max(1, 256/m_coarsen_ratio); + bl.maxSize(mgs); + bl.refine(m_coarsen_ratio); + BoxArray ba2d(std::move(bl)); + DistributionMapping dm2d{ba2d}; + m_dpdn[idim].define(ba2d, dm2d, 1, 0); + ntags += m_dpdn[idim].local_size(); + } + + m_momtags_h.reserve(ntags); + int nblocks = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + for (MFIter mfi(m_dpdn[idim]); mfi.isValid(); ++mfi) { + Box const& b2d = mfi.validbox(); + Orientation::Side side = (b2d.smallEnd(idim) == domain0.smallEnd(idim)) + ? Orientation::low : Orientation::high; + Orientation face(idim, side); + m_momtags_h.push_back({m_dpdn[idim].const_array(mfi), b2d, face, + nblocks}); + nblocks += static_cast(b2d.numPts()) + / (m_coarsen_ratio*m_coarsen_ratio); + } + } + m_nblocks_local = nblocks; + +#ifdef AMREX_USE_GPU + if (ntags > 0) { + m_momtags_d.resize(ntags); + Gpu::copyAsync(Gpu::hostToDevice, m_momtags_h.begin(), m_momtags_h.end(), m_momtags_d.begin()); + + m_nthreads_momtag = (m_coarsen_ratio == 8) ? 64 : 128; + int ntotgpublocks = 0; + m_ngpublocks_h.reserve(ntags+1); + for (auto const& tag : m_momtags_h) { + m_ngpublocks_h.push_back(ntotgpublocks); + Box cb2d = amrex::coarsen(tag.b2d, m_coarsen_ratio); + ntotgpublocks += static_cast(cb2d.numPts()); + } + m_ngpublocks_h.push_back(ntotgpublocks); + m_ngpublocks_d.resize(m_ngpublocks_h.size()); + Gpu::copyAsync(Gpu::hostToDevice, m_ngpublocks_h.begin(), m_ngpublocks_h.end(), + m_ngpublocks_d.begin()); + } +#endif + + auto const dx = m_geom[0].CellSize(); + Real dmax = amrex::max(std::sqrt(dx[0]*dx[0]+dx[1]*dx[1]), + std::sqrt(dx[0]*dx[0]+dx[2]*dx[2]), + std::sqrt(dx[1]*dx[1]+dx[2]*dx[2])); + m_ngrowdomain[0] = static_cast(std::ceil(dmax/dx[0])) * m_coarsen_ratio; + m_ngrowdomain[1] = static_cast(std::ceil(dmax/dx[1])) * m_coarsen_ratio; + m_ngrowdomain[2] = static_cast(std::ceil(dmax/dx[2])) * m_coarsen_ratio; + // This is the minimal size we need to embiggen the domain. + + Box const domain1 = amrex::grow(domain0, m_ngrowdomain); + BoxList bl_crse_grown_faces(IndexType::TheNodeType()); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + Box face_box = amrex::surroundingNodes(amrex::bdryNode(domain1,face)); + face_box.coarsen(m_coarsen_ratio); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (idim != face.coordDir()) { + face_box.grow(idim,openbc::P); + } + } + bl_crse_grown_faces.push_back(face_box); + } + + bl_crse_grown_faces.maxSize(16); // xxxxx make this a parameter? + BoxArray ba_crse_grown_faces(std::move(bl_crse_grown_faces)); + DistributionMapping dm_crse_grown_faces(ba_crse_grown_faces); + m_crse_grown_faces_phi.define(ba_crse_grown_faces, dm_crse_grown_faces, 1, 0); + + BoxList blg = amrex::boxDiff(domain1, domain0); + blg.maxSize(std::max(64,m_coarsen_ratio)); // xxxxx make this a parameter? + m_bag = BoxArray(std::move(blg)); + DistributionMapping dmg(m_bag); + m_phind.define(amrex::coarsen(amrex::convert(m_bag,IntVect(1)),m_coarsen_ratio), + dmg, 1, openbc::P); + + BoxList bl0 = m_grids[0].boxList(); + BoxList bl1 = m_bag.boxList(); + Vector p0 = m_dmap[0].ProcessorMap(); + Vector p1 = dmg.ProcessorMap(); + bl0.join(bl1); + p0.insert(p0.end(), p1.begin(), p1.end()); + IntVect const offset = -domain1.smallEnd(); + for (auto& b : bl0) { + b.shift(offset); + } + m_ba_all = BoxArray(std::move(bl0)); + m_dm_all = DistributionMapping(std::move(p0)); + + auto const problo = m_geom[0].ProbLo(); + auto const probhi = m_geom[0].ProbHi(); + std::array problo_all, probhi_all; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + problo_all[idim] = problo[idim] - m_ngrowdomain[idim]*dx[idim]; + probhi_all[idim] = probhi[idim] + m_ngrowdomain[idim]*dx[idim]; + } + m_geom_all = Geometry(amrex::shift(domain1,offset), + RealBox(problo_all,probhi_all), + m_geom[0].Coord(), m_geom[0].isPeriodic()); +} + +void OpenBCSolver::setVerbose (int v) noexcept +{ + m_verbose = v; +} + +void OpenBCSolver::setBottomVerbose (int v) noexcept +{ + m_bottom_verbose = v; +} + +void OpenBCSolver::useHypre (bool use_hypre) noexcept +{ + if (use_hypre) { + m_bottom_solver_type = BottomSolver::hypre; + m_info.setMaxCoarseningLevel(0); +#ifndef AMREX_USE_HYPRE + amrex::Abort("OpenBCSolver: Must enable Hypre support to use it."); +#endif + } +} + +Real OpenBCSolver::solve (const Vector& a_sol, + const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs) +{ + BL_PROFILE("OpenBCSolver::solve()"); + + auto solve_start_time = amrex::second(); + + int nlevels = m_geom.size(); + + BL_PROFILE_VAR("OpenBCSolver::MG1", blp_mg1); + + if (m_poisson_1 == nullptr) { + m_poisson_1 = std::make_unique(m_geom, m_grids, m_dmap, m_info); + m_poisson_1->setVerbose(m_verbose); + m_poisson_1->setMaxOrder(4); + m_poisson_1->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + for (int ilev = 0; ilev < nlevels; ++ilev) { + m_poisson_1->setLevelBC(ilev, nullptr); + } + + m_mlmg_1 = std::make_unique(*m_poisson_1); + m_mlmg_1->setVerbose(m_verbose); + m_mlmg_1->setBottomVerbose(m_bottom_verbose); + m_mlmg_1->setBottomSolver(m_bottom_solver_type); +#ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { + m_mlmg_1->setHypreInterface(Hypre::Interface::structed); + } +#endif + } + m_mlmg_1->solve(a_sol, a_rhs, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg1); + + Array dpdn_tmp; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + dpdn_tmp[idim].define(amrex::convert(m_grids[0], + IntVect::TheDimensionVector(idim)), + m_dmap[0], 1, 0); + } + m_poisson_1->get_dpdn_on_domain_faces(GetArrOfPtrs(dpdn_tmp), *a_sol[0]); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_dpdn[idim].ParallelCopy(dpdn_tmp[idim]); + } + + { + Gpu::DeviceVector moments(m_nblocks_local); + compute_moments(moments); + compute_potential(moments); + } + + MultiFab rhsg(m_bag, m_phind.DistributionMap(), 1, a_rhs[0]->nGrowVect()); + rhsg.setVal(0._rt); + + MultiFab solg(m_bag, m_phind.DistributionMap(), 1, 1); + solg.setVal(0._rt); + interpolate_potential(solg); + + const int nboxes0 = m_grids[0].size(); + MultiFab sol_all(m_ba_all, m_dm_all, 1, solg.nGrowVect(), + MFInfo().SetAlloc(false)); + MultiFab rhs_all(m_ba_all, m_dm_all, 1, rhsg.nGrowVect(), + MFInfo().SetAlloc(false)); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + IntVect const offset = -domain1.smallEnd(); + for (MFIter mfi(sol_all); mfi.isValid(); ++mfi) { + const int index = mfi.index(); + FArrayBox solfab, rhsfab; + if (index < nboxes0) { + FArrayBox& sfab0 = (*a_sol[0])[index]; + if (sol_all.nGrowVect() == a_sol[0]->nGrowVect()) { + solfab = FArrayBox(sfab0, amrex::make_alias, 0, 1); + } else { + Box b = sfab0.box(); + b.grow(sol_all.nGrowVect()-a_sol[0]->nGrowVect()); + solfab.resize(b,1); + solfab.template setVal(0._rt); + } + rhsfab = FArrayBox((*a_rhs[0])[index], amrex::make_alias, 0, 1); + } else { + solfab = FArrayBox(solg[index-nboxes0], amrex::make_alias, 0, 1); + rhsfab = FArrayBox(rhsg[index-nboxes0], amrex::make_alias, 0, 1); + } + solfab.shift(offset); + rhsfab.shift(offset); + sol_all.setFab(index, std::move(solfab)); + rhs_all.setFab(index, std::move(rhsfab)); + } + + BL_PROFILE_VAR("OpenBCSolver::MG2", blp_mg2); + + if (m_poisson_2 == nullptr) { + Vector geom_all = m_geom; + Vector grids_all = m_grids; + Vector dmap_all = m_dmap; + geom_all[0] = m_geom_all; + grids_all[0] = m_ba_all; + dmap_all[0] = m_dm_all; + m_poisson_2 = std::make_unique(geom_all, grids_all, dmap_all, + m_info); + m_poisson_2->setVerbose(m_verbose); + m_poisson_2->setMaxOrder(4); + m_poisson_2->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + m_poisson_2->setLevelBC(0, &sol_all); + for (int ilev = 1; ilev < nlevels; ++ilev) { + m_poisson_2->setLevelBC(ilev, nullptr); + } + + m_mlmg_2 = std::make_unique(*m_poisson_2); + m_mlmg_2->setVerbose(m_verbose); + m_mlmg_2->setBottomVerbose(m_bottom_verbose); + m_mlmg_2->setBottomSolver(m_bottom_solver_type); +#ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { + m_mlmg_2->setHypreInterface(Hypre::Interface::structed); + } +#endif + } + Vector solv_all = a_sol; + Vector rhsv_all = a_rhs; + solv_all[0] = &sol_all; + rhsv_all[0] = &rhs_all; + Real err = m_mlmg_2->solve(solv_all, rhsv_all, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg2); + + if (sol_all.nGrowVect() != a_sol[0]->nGrowVect()) { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*a_sol[0], TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& sall = sol_all.const_array(mfi.index()); + Array4 const& s = a_sol[0]->array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(bx, i, j, k, + { + s(i,j,k) = sall(i,j,k); + }); + } + } + + auto solve_stop_time = amrex::second(); + if (m_verbose >= 1) { + amrex::Print() << "OpenBCSolver time = " + << solve_stop_time - solve_start_time << "\n"; + } + + return err; +} + +void OpenBCSolver::compute_moments (Gpu::DeviceVector& moments) +{ + BL_PROFILE("OpenBCSolver::comp_mom()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const probhi = m_geom[0].ProbHiArray(); + auto const dx = m_geom[0].CellSizeArray(); + +#ifdef AMREX_USE_GPU + if (m_momtags_h.size() > 0) + { + int crse_ratio = m_coarsen_ratio; + int ntags = m_momtags_h.size(); + openbc::Moments* pm = moments.data(); + openbc::MomTag const* ptag = m_momtags_d.data(); + int const* pnblks = m_ngpublocks_d.data(); + std::size_t shared_mem_bytes = m_nthreads_momtag * sizeof(openbc::Moments::array_type); + +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,probhi,dx,crse_ratio,ntags,pm,ptag,pnblks, + shared_mem_bytes); + amrex::Abort("xxxx DPCPP todo: openbc compute_moments"); +#else + amrex::launch(m_ngpublocks_h.back(), m_nthreads_momtag, shared_mem_bytes, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + Gpu::SharedMemory gsm; + openbc::Moments::array_type* const shared = gsm.dataPtr(); + openbc::Moments::array_type& tmom = shared[threadIdx.x]; + for (int i = 0; i < (openbc::M+1)*(openbc::M+2)/2; ++i) { + tmom[i] = Real(0.); + } + + int tag_id = amrex::bisect(pnblks, 0, ntags, static_cast(blockIdx.x)); + int iblock = blockIdx.x - pnblks[tag_id]; // iblock'th gpublock on this box. + auto const& tag = ptag[tag_id]; + openbc::Moments& mom = pm[tag.offset+iblock]; + if (tag.face.coordDir() == 0) { + int const nby = tag.b2d.length(1) / crse_ratio; + int const kb = iblock / nby; + int const jb = iblock - kb*nby; + int const i = tag.b2d.smallEnd(0); + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[1]*dx[2]; + Real const xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int j = icell - k*crse_ratio; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + j += jlo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = xc; + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else if (tag.face.coordDir() == 1) { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const kb = iblock / nbx; + int const ib = iblock - kb*nbx; + int const j = tag.b2d.smallEnd(1); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[0]*dx[2]; + Real const yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int i = icell - k*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + i += ilo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const jb = iblock / nbx; + int const ib = iblock - jb*nbx; + int const k = tag.b2d.smallEnd(2); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + Real const fac = dx[0]*dx[1]; + Real const zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int j = icell/crse_ratio; + int i = icell - j*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + i += ilo; + j += jlo; + Real const charge = tag.gp(i,j,k) * fac; + Real ypow = Real(1.); + int m = 0; + for (int q=0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = zc; + mom.face = tag.face; + } + } + openbc::scale_moments(tmom); + + __syncthreads(); + + if (threadIdx.x < (openbc::M+1)*(openbc::M+2)/2) { + mom.mom[threadIdx.x] = Real(0.); + for (unsigned int i = 0; i < blockDim.x; ++i) { + mom.mom[threadIdx.x] += shared[i][threadIdx.x]; + } + } + }); +#endif + } +#else + for (auto const& tag : m_momtags_h) { + if (tag.face.coordDir() == 0) { + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int i = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[1]*dx[2]; + Real xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int kb = 0; kb < nbz; ++kb) { + for (int jb = 0; jb < nby; ++jb) { + openbc::Moments& mom = moments[tag.offset+jb+kb*nby]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + Real charge = tag.gp(i, jlo+jb*m_coarsen_ratio+jj, + klo+kb*m_coarsen_ratio+kk) * fac; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + // center of the block + mom.x = xc; + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else if (tag.face.coordDir() == 1) { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int j = tag.b2d.smallEnd(1); + int ilo = tag.b2d.smallEnd(0); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[0]*dx[2]; + Real yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int kb = 0; kb < nbz; ++kb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+kb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, j, + klo+kb*m_coarsen_ratio+kk) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int k = tag.b2d.smallEnd(2); + int ilo = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + Real fac = dx[0]*dx[1]; + Real zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int jb = 0; jb < nby; ++jb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+jb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, + jlo+jb*m_coarsen_ratio+jj, k) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real ypow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = zc; + mom.face = tag.face; + }} + } + } +#endif + +#ifdef AMREX_USE_MPI + bcast_moments(moments); +#endif + m_nblocks = moments.size(); +} + +#ifdef AMREX_USE_MPI +void OpenBCSolver::bcast_moments (Gpu::DeviceVector& moments) +{ + if (ParallelContext::NProcsSub() > 1) + { + MPI_Comm comm = ParallelContext::CommunicatorSub(); + if (m_nblocks == 0) { + int count = moments.size(); + count *= static_cast(sizeof(openbc::Moments)); + m_countvec.resize(ParallelContext::NProcsSub()); + MPI_Allgather(&count, 1, MPI_INT, m_countvec.data(), 1, MPI_INT, comm); + + m_offset.resize(m_countvec.size(), 0); + Long count_tot = m_countvec[0]; + for (int i = 1, N = m_offset.size(); i < N; ++i) { + m_offset[i] = m_offset[i-1] + m_countvec[i-1]; + count_tot += m_countvec[i]; + } + + if (count_tot > static_cast(std::numeric_limits::max())) { + amrex::Abort("OpenBC: integer overflow. Let us know and we will fix this."); + } + + m_nblocks = count_tot/sizeof(openbc::Moments); + } + + Gpu::DeviceVector moments_all(m_nblocks); + +#ifdef AMREX_USE_GPU + Gpu::PinnedVector h_moments(moments.size()); + Gpu::PinnedVector h_moments_all(moments_all.size()); + Gpu::copyAsync(Gpu::deviceToHost, moments.begin(), moments.end(), + h_moments.begin()); + Gpu::streamSynchronize(); +#else + auto const& h_moments = moments; + auto& h_moments_all = moments_all; +#endif + + int count = m_nblocks_local*static_cast(sizeof(openbc::Moments)); + MPI_Allgatherv(h_moments.data(), count, MPI_CHAR, h_moments_all.data(), + m_countvec.data(), m_offset.data(), MPI_CHAR, comm); + +#ifdef AMREX_USE_GPU + Gpu::copyAsync(Gpu::hostToDevice, h_moments_all.begin(), h_moments_all.end(), + moments_all.begin()); + Gpu::streamSynchronize(); +#endif + + std::swap(moments, moments_all); + } +} +#endif + +void OpenBCSolver::compute_potential (Gpu::DeviceVector const& moments) +{ + BL_PROFILE("OpenBCSolver::comp_phi()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const dx = m_geom[0].CellSizeArray(); + + int crse_ratio = m_coarsen_ratio; + int nblocks = m_nblocks; + openbc::Moments const* pmom = moments.data(); + for (MFIter mfi(m_crse_grown_faces_phi); mfi.isValid(); ++mfi) { + Box const& b = mfi.validbox(); + Array4 const& phi_arr = m_crse_grown_faces_phi.array(mfi); +#if defined(AMREX_USE_GPU) + const auto lo = amrex::lbound(b); + const auto len = amrex::length(b); + const auto lenxy = len.x*len.y; + const auto lenx = len.x; +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,dx,crse_ratio,nblocks,pmom,b,phi_arr,lo, + lenxy,lenx); + amrex::Abort("xxxxx DPCPP todo: openbc compute_potential"); +#else + amrex::launch(b.numPts(), AMREX_GPU_MAX_THREADS, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + int icell = blockIdx.x; + int k = icell / lenxy; + int j = (icell - k*lenxy) / lenx; + int i = (icell - k*lenxy) - j*lenx; + i += lo.x; + j += lo.y; + k += lo.z; + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = Real(0.); + for (int iblock = threadIdx.x; iblock < nblocks; iblock += blockDim.x) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + Real phitot = Gpu::blockReduceSum(phi); + if (threadIdx.x == 0) { + phi_arr(i,j,k) = phitot; + } + }); +#endif +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) noexcept + { + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = 0._rt; + for (int iblock = 0; iblock < nblocks; ++iblock) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + phi_arr(i,j,k) = phi; + }); +#endif + } + + m_phind.ParallelCopy(m_crse_grown_faces_phi, 0, 0, 1, IntVect(0), + m_phind.nGrowVect()); +} + +void OpenBCSolver::interpolate_potential (MultiFab& solg) +{ + BL_PROFILE("OpenBCSolver::interp_phi"); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + int crse_ratio = m_coarsen_ratio; + + for (MFIter mfi(solg); mfi.isValid(); ++mfi) { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain1[face]) { + Array4 const& solg_arr = solg.array(mfi); + Array4 const& phi_arr = m_phind.const_array(mfi); + Box const& b2d = amrex::bdryNode(vbx, face); + int offset = face.isLow() ? -1 : 0; + if (face.coordDir() == 0) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(1,openbc::P).surroundingNodes(1); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(crse_ratio,1,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int k) noexcept + { + int i = ic*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccy(ic,j,k,ctmp,crse_ratio); + }); + } else if (face.coordDir() == 1) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,crse_ratio,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int jc, int k) noexcept + { + int j = jc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,jc,k,ctmp,crse_ratio); + }); + } else { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,1,crse_ratio)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int kc) noexcept + { + tmp(ic,j,kc) = openbc::interpccy(ic,j,kc,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,1,crse_ratio)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int j, int kc) noexcept + { + int k = kc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,j,kc,ctmp,crse_ratio); + }); + } + } + } + } +} + +namespace openbc { +std::ostream& operator<< (std::ostream& os, Moments const& mom) +{ + os << "Face " << mom.face << ", x = " << mom.x << ", y = " << mom.y + << ", z = " << mom.z << "\n" + << " " << mom.mom[0] << "\n" + << " " << mom.mom[1] << ", " << mom.mom[8] << "\n" + << " " << mom.mom[2] << ", " << mom.mom[9] << ", " << mom.mom[15] << "\n" + << " " << mom.mom[3] << ", " << mom.mom[10] << ", " << mom.mom[16] + << ", " << mom.mom[21] << "\n" + << " " << mom.mom[4] << ", " << mom.mom[11] << ", " << mom.mom[17] + << ", " << mom.mom[22] << ", " << mom.mom[26] << "\n" + << " " << mom.mom[5] << ", " << mom.mom[12] << ", " << mom.mom[18] + << ", " << mom.mom[23] << ", " << mom.mom[27] << ", " << mom.mom[30] << "\n" + << " " << mom.mom[6] << ", " << mom.mom[13] << ", " << mom.mom[19] + << ", " << mom.mom[24] << ", " << mom.mom[28] << ", " << mom.mom[31] + << ", " << mom.mom[33] << "\n" + << " " << mom.mom[7] << ", " << mom.mom[14] << ", " << mom.mom[20] + << ", " << mom.mom[25] << ", " << mom.mom[29] << ", " << mom.mom[32] + << ", " << mom.mom[34] << ", " << mom.mom[35] << "\n"; + return os; +} +} + +} diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H new file mode 100644 index 00000000000..7a6b2643b68 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H @@ -0,0 +1,166 @@ +#ifndef AMREX_OPENBC_K_H_ +#define AMREX_OPENBC_K_H_ + +#include +#include + +namespace amrex { namespace openbc { + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void scale_moments (openbc::Moments::array_type& mom) +{ // p!*q! in the order of 0!*0!, 1!*0!, ..., 7!*0!, 0!*1!, 1!*1!, 2!*1!, ..., 6!*1!, 0!*2!, ..., 0!*7!. + mom[ 2] *= Real(0.5); + mom[ 3] *= Real(1./6.); + mom[ 4] *= Real(1./24.); + mom[ 5] *= Real(1./120.); + mom[ 6] *= Real(1./720.); + mom[ 7] *= Real(1./5040.); + mom[10] *= Real(0.5); + mom[11] *= Real(1./6.); + mom[12] *= Real(1./24.); + mom[13] *= Real(1./120.); + mom[14] *= Real(1./720.); + mom[15] *= Real(0.5); + mom[16] *= Real(0.5); + mom[17] *= Real(0.25); + mom[18] *= Real(1./12.); + mom[19] *= Real(1./48.); + mom[20] *= Real(1./240.); + mom[21] *= Real(1./6.); + mom[22] *= Real(1./6.); + mom[23] *= Real(1./12.); + mom[24] *= Real(1./36.); + mom[25] *= Real(1./144.); + mom[26] *= Real(1./24.); + mom[27] *= Real(1./24.); + mom[28] *= Real(1./48.); + mom[29] *= Real(1./144.); + mom[30] *= Real(1./120.); + mom[31] *= Real(1./120.); + mom[32] *= Real(1./240.); + mom[33] *= Real(1./720.); + mom[34] *= Real(1./720.); + mom[35] *= Real(1./5040.); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real block_potential (openbc::Moments const& mom, Real xb, Real yb, Real zb) +{ + constexpr Real oneover4pi = Real(1.)/Real(4.*3.1415926535897932); + + xb -= mom.x; + yb -= mom.y; + zb -= mom.z; + Real ri = Real(1.)/std::sqrt(xb*xb+yb*yb+zb*zb); + Real ri2 = ri*ri; + Real ri3 = ri2*ri; + Real ri4 = ri3*ri; + Real xr, yr; + if (mom.face.coordDir() == 0) { + xr = yb*ri; + yr = zb*ri; + } else if (mom.face.coordDir() == 1) { + xr = xb*ri; + yr = zb*ri; + } else { + xr = xb*ri; + yr = yb*ri; + } + Real xr2 = xr *xr; + Real xr4 = xr2*xr2; + Real xr6 = xr4*xr2; + Real yr2 = yr *yr; + Real yr4 = yr2*yr2; + Real yr6 = yr4*yr2; + Real phi = ri * mom.mom[0] + + ri2*(xr*mom.mom[1] + yr*mom.mom[8]) + + ri3*((Real(3.) * xr2 - Real(1.)) * mom.mom[2] + + (Real(3.) * xr * yr ) * mom.mom[9] + + (Real(3.) * yr2 - Real(1.)) * mom.mom[15]) + + ri4 * (xr * (Real(15.) * xr2 - Real(9.)) * mom.mom[3] + + yr * (Real(15.) * xr2 - Real(3.)) * mom.mom[10] + + xr * (Real(15.) * yr2 - Real(3.)) * mom.mom[16] + + yr * (Real(15.) * yr2 - Real(9.)) * mom.mom[21]) + + ri4*ri * ((Real(105.) * xr4 - Real(90.) * xr2 + Real(9.)) * mom.mom[4] + + (xr * yr * (Real(105.) * xr2 - Real(45.))) * mom.mom[11] + + (Real(105.) * xr2 * yr2 - Real(15.) * xr2 - Real(15.) * yr2 + Real(3.)) * mom.mom[17] + + (xr * yr * (Real(105.) * yr2 - Real(45.))) * mom.mom[22] + + (Real(105.) * yr4 - Real(90.) * yr2 + Real(9.)) * mom.mom[26]) + + ri4*ri2 * (xr * (Real(945.)*xr4 - Real(1050.)*xr2 + Real(225.)) * mom.mom[5] + + yr * (Real(945.)*xr4 - Real(630.)*xr2 + Real(45.)) * mom.mom[12] + + xr * (Real(945.)*xr2*yr2 - Real(105.)*xr2 - Real(315.)*yr2 + Real(45.)) * mom.mom[18] + + yr * (Real(945.)*xr2*yr2 - Real(315.)*xr2 - Real(105.)*yr2 + Real(45.)) * mom.mom[23] + + xr * (Real(945.)*yr4 - Real(630.)*yr2 + Real(45.)) * mom.mom[27] + + yr * (Real(945.)*yr4 - Real(1050.)*yr2 + Real(225.)) * mom.mom[30]) + + ri4*ri3 * (Real(45.) * (Real(231.)*xr6 - Real(315.)*xr4 + Real(105.)*xr2 - Real(5.)) * mom.mom[6] + + Real(315.)*xr*yr * (Real(33.)*xr4 - Real(30.)*xr2 + Real(5.)) * mom.mom[13] + + Real(45.) * (Real(231.)*xr4*yr2 - Real(21.)*xr4 - Real(126.)*xr2*yr2 + Real(14.)*xr2 + Real(7.)*yr2 - Real(1.)) * mom.mom[19] + + Real(945.)*xr*yr * (Real(11.)*xr2*yr2 - Real(3.)*xr2 - Real(3.)*yr2 + Real(1.)) * mom.mom[24] + + Real(45.) * (Real(231.)*xr2*yr4 - Real(126.)*xr2*yr2 + Real(7.)*xr2 - Real(21.)*yr4 + Real(14.)*yr2 - Real(1.)) * mom.mom[28] + + Real(315.)*xr*yr * (Real(33.)*yr4 - Real(30.)*yr2 + Real(5.)) * mom.mom[31] + + Real(45.) * (Real(231.)*yr6 - Real(315.)*yr4 + Real(105.)*yr2 - Real(5.)) * mom.mom[33]) + + ri4*ri4*(Real(315.)*xr*(Real(429.)*xr6 - Real(693.)*xr4 + Real(315.)*xr2 - Real(35.)) * mom.mom[7] + + Real(315.)*yr*(Real(429.)*xr6 - Real(495.)*xr4 + Real(135.)*xr2 - Real(5.)) * mom.mom[14] + + Real(315.)*xr*(Real(429.)*xr4*yr2 - Real(33.)*xr4 - Real(330.)*xr2*yr2 + Real(30.)*xr2 + Real(45.)*yr2 - Real(5.)) * mom.mom[20] + + Real(945.)*yr*(Real(143.)*xr4*yr2 - Real(33.)*xr4 - Real(66.)*xr2*yr2 + Real(18.)*xr2 + Real(3.)*yr2 - Real(1.)) * mom.mom[25] + + Real(945.)*xr*(Real(143.)*xr2*yr4 - Real(66.)*xr2*yr2 + Real(3.)*xr2 - Real(33.)*yr4 + Real(18.)*yr2 - Real(1.)) * mom.mom[29] + + Real(315.)*yr*(Real(429.)*xr2*yr4 - Real(330.)*xr2*yr2 + Real(45.)*xr2 - Real(33.)*yr4 + Real(30.)*yr2 - Real(5.)) * mom.mom[32] + + Real(315.)*xr*(Real(429.)*yr6 - Real(495.)*yr4 + Real(135.)*yr2 - Real(5.)) * mom.mom[34] + + Real(315.)*yr*(Real(429.)*yr6 - Real(693.)*yr4 + Real(315.)*yr2 - Real(35.)) * mom.mom[35]); + return phi*(-oneover4pi); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void interp_coef (int i, int ii, Real* AMREX_RESTRICT c, int crse_ratio) +{ + static_assert(openbc::P == 3, "openbc::P is assumed to be 3 here"); + Real xint = (ii-i*crse_ratio + Real(0.5))/static_cast(crse_ratio); + constexpr Real x[] = {-3._rt, -2._rt, -1._rt, 0._rt, 1._rt, 2._rt, 3._rt, 4._rt}; + poly_interp_coeff<8>(xint, x, c); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccx (int ii, int j, int k, Array4 const& phi, int crse_ratio) +{ + int i = amrex::coarsen(ii,crse_ratio); + Real c[8]; + interp_coef(i,ii,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i-3+n,j,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccy (int i, int jj, int k, Array4 const& phi, int crse_ratio) +{ + int j = amrex::coarsen(jj,crse_ratio); + Real c[8]; + interp_coef(j,jj,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j-3+n,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccz (int i, int j, int kk, Array4 const& phi, int crse_ratio) +{ + int k = amrex::coarsen(kk,crse_ratio); + Real c[8]; + interp_coef(k,kk,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j,k-3+n); + } + return p; +} + +}} + +#endif diff --git a/Src/LinearSolvers/OpenBC/Make.package b/Src/LinearSolvers/OpenBC/Make.package new file mode 100644 index 00000000000..5fc39f69371 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/Make.package @@ -0,0 +1,6 @@ + +CEXE_headers += AMReX_OpenBC.H AMReX_OpenBC_K.H +CEXE_sources += AMReX_OpenBC.cpp + +VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H index 93c9415ad25..0f1e94bb176 100644 --- a/Src/Particle/AMReX_DenseBins.H +++ b/Src/Particle/AMReX_DenseBins.H @@ -200,6 +200,7 @@ public: m_bins.resize(nitems); m_perm.resize(nitems); + m_local_offsets.resize(nitems); m_counts.resize(0); m_counts.resize(nbins+1, 0); @@ -209,21 +210,21 @@ public: index_type* pbins = m_bins.dataPtr(); index_type* pcount = m_counts.dataPtr(); + index_type* plocal_offsets = m_local_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { pbins[i] = f(v[i]); - Gpu::Atomic::AddNoRet(&pcount[pbins[i]], index_type{ 1 }); + index_type off = Gpu::Atomic::Add(&pcount[pbins[i]], index_type{ 1 }); + plocal_offsets[i] = off; }); Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin()); - Gpu::copyAsync(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin()); - index_type* pperm = m_perm.dataPtr(); - constexpr index_type max_index = std::numeric_limits::max(); + index_type* poffsets = m_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { - index_type index = Gpu::Atomic::Inc(&pcount[pbins[i]], max_index); + index_type index = poffsets[pbins[i]] + plocal_offsets[i]; pperm[index] = i; }); @@ -503,6 +504,7 @@ private: Gpu::DeviceVector m_bins; Gpu::DeviceVector m_counts; + Gpu::DeviceVector m_local_offsets; Gpu::DeviceVector m_offsets; Gpu::DeviceVector m_perm; }; diff --git a/Src/Particle/AMReX_NeighborParticles.H b/Src/Particle/AMReX_NeighborParticles.H index 36d2c5351d7..344d39f778e 100644 --- a/Src/Particle/AMReX_NeighborParticles.H +++ b/Src/Particle/AMReX_NeighborParticles.H @@ -348,6 +348,11 @@ protected: /// void BuildMasks (); + /// + /// Are the masks computed by the above function still valid? + /// + bool areMasksValid (); + void GetNeighborCommTags (); void GetCommTagsBox (Vector& tags, const int lev, const Box& in_box); diff --git a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H index d5fb9fc40ee..4d5ecb4fcc8 100644 --- a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H @@ -7,8 +7,10 @@ void NeighborParticleContainer ::fillNeighborsCPU () { BL_PROFILE("NeighborParticleContainer::fillNeighborsCPU"); - BuildMasks(); - GetNeighborCommTags(); + if (!areMasksValid()) { + BuildMasks(); + GetNeighborCommTags(); + } cacheNeighborInfo(); updateNeighborsCPU(false); } diff --git a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H index 6e112318757..81bef1302e2 100644 --- a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H @@ -121,7 +121,7 @@ buildNeighborCopyOp (bool use_boundary_neighbor) { BL_PROFILE("NeighborParticleContainer::buildNeighborCopyOp()"); - AMREX_ASSERT(hasNeighbors() == false); + AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor); const int lev = 0; const auto& geom = this->Geom(lev); diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index a07cfab92c4..202f41f87f3 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -119,6 +119,30 @@ NeighborParticleContainer this->Redistribute(); } +template +bool +NeighborParticleContainer +::areMasksValid () { + + BL_PROFILE("NeighborParticleContainer::areMasksValid"); + + resizeContainers(this->numLevels()); + + for (int lev = 0; lev < this->numLevels(); ++lev) + { + BoxArray ba = this->ParticleBoxArray(lev); + const DistributionMapping& dmap = this->ParticleDistributionMap(lev); + + if (mask_ptr[lev] == nullptr || + ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) || + ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap)) + { + return false; + } + } + return true; +} + template void NeighborParticleContainer @@ -136,30 +160,25 @@ NeighborParticleContainer BoxArray ba = this->ParticleBoxArray(lev); const DistributionMapping& dmap = this->ParticleDistributionMap(lev); - if (mask_ptr[lev] == nullptr || - ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) || - ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap)) - { - const Geometry& geom = this->Geom(lev); + const Geometry& geom = this->Geom(lev); - mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells); - mask_ptr[lev]->setVal(-1, m_num_neighbor_cells); + mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells); + mask_ptr[lev]->setVal(-1, m_num_neighbor_cells); #ifdef AMREX_USE_OMP #pragma omp parallel #endif - for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector()); - mfi.isValid(); ++mfi) { - const Box& box = mfi.tilebox(); - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid, 1); - (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile, 1); - (*mask_ptr[lev])[mfi].template setVal(lev , box, MaskComps::level, 1); - } - - mask_ptr[lev]->FillBoundary(geom.periodicity()); + for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector()); + mfi.isValid(); ++mfi) { + const Box& box = mfi.tilebox(); + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); + (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid, 1); + (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile, 1); + (*mask_ptr[lev])[mfi].template setVal(lev , box, MaskComps::level, 1); } + + mask_ptr[lev]->FillBoundary(geom.periodicity()); } } @@ -794,9 +813,21 @@ NeighborParticleContainer:: selectActualNeighbors (CheckPair&& check_pair, int num_cells) { BL_PROFILE("NeighborParticleContainer::selectActualNeighbors"); + const auto& geom_fine = this->Geom(0); + const auto& ba_fine = this->ParticleBoxArray(0); + if (ba_fine.size() == 1 && !geom_fine.isAnyPeriodic()) { + return; + } for (int lev = 0; lev < this->numLevels(); ++lev) { + // clear previous neighbor particle ids + if (!m_boundary_particle_ids.empty()) { + for (auto& keyval: m_boundary_particle_ids[lev]) { + keyval.second.clear(); + } + } + for (MyParIter pti(*this, lev); pti.isValid(); ++pti) { PairIndex index(pti.index(), pti.LocalTileIndex()); @@ -838,8 +869,8 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) auto pperm = bins.permutationPtr(); auto poffset = bins.offsetsPtr(); - unsigned int np_boundary = 0; - unsigned int* p_np_boundary = &np_boundary; + Gpu::Buffer np_boundary({0}); + unsigned int* p_np_boundary = np_boundary.data(); constexpr unsigned int max_unsigned_int = std::numeric_limits::max(); AMREX_FOR_1D ( np_real, i, @@ -880,9 +911,9 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) } } });// end amrex_for_1d - Gpu::streamSynchronize(); - m_boundary_particle_ids[lev][index].resize(np_boundary); + unsigned int* p_np_boundary_h = np_boundary.copyToHost(); + m_boundary_particle_ids[lev][index].resize(*p_np_boundary_h); }// end mypariter }// end lev diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index c164e7214d3..d604a36c896 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -531,9 +531,9 @@ public: */ bool OK (int lev_min = 0, int lev_max = -1, int nGrow = 0) const; - void ByteSpread () const; + std::array ByteSpread () const; - void PrintCapacity () const; + std::array PrintCapacity () const; void ShrinkToFit (); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index c7ee56c3f68..f257ff17ddd 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -241,10 +241,11 @@ ParticleContainer_impl const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); const auto phi = geom.ProbHiArray(); - const auto rhi = geom.RoundoffHiArray(); + const auto rlo = geom.ProbLoArrayInParticleReal(); + const auto rhi = geom.ProbHiArrayInParticleReal(); const auto is_per = geom.isPeriodicArray(); - return enforcePeriodic(p, plo, phi, rhi, is_per); + return enforcePeriodic(p, plo, phi, rlo, rhi, is_per); } template ::locatePa if (! outside) { - if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))) + if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) { - RealBox roundoff_domain = Geom(0).RoundoffDomain(); + GpuArray rhi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rlo = Geom(0).ProbLoArrayInParticleReal(); for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { - if (p.pos(idim) <= roundoff_domain.lo(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.lo(idim), (ParticleReal) roundoff_domain.hi(idim)); + if (p.pos(idim) <= rlo[idim]) { + p.pos(idim) = std::nextafter(rlo[idim], rhi[idim]); } - if (p.pos(idim) >= roundoff_domain.hi(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.hi(idim), (ParticleReal) roundoff_domain.lo(idim)); + if (p.pos(idim) >= rhi[idim]) { + p.pos(idim) = std::nextafter(rhi[idim], rlo[idim]); } } - AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))); + AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))); } } @@ -517,8 +519,9 @@ Long ParticleContainer_impl::Num template class Allocator> -void -ParticleContainer_impl::ByteSpread () const +std::array +ParticleContainer_impl +::ByteSpread () const { Long cnt = 0; @@ -533,7 +536,7 @@ ParticleContainer_impl::ByteSpre Long mn = cnt, mx = mn; const int IOProc = ParallelContext::IOProcessorNumberSub(); - const std::size_t sz = sizeof(ParticleType)+NumRealComps()*sizeof(Real)+NumIntComps()*sizeof(int); + const Long sz = sizeof(ParticleType)+NumRealComps()*sizeof(ParticleReal)+NumIntComps()*sizeof(int); #ifdef AMREX_LAZY Lazy::QueueReduction( [=] () mutable { @@ -542,22 +545,27 @@ ParticleContainer_impl::ByteSpre ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes (num particles): [Min: " << mn*sz << " (" << mn << ")" - << " ... " + << ", Max: " << mx*sz << " (" << mx << ")" - << "] total particles: (" << cnt << ")\n"; + << ", Total: " + << cnt*sz + << " (" << cnt << ")]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn*sz, mx*sz, cnt*sz}; } template class Allocator> -void -ParticleContainer_impl::PrintCapacity () const +std::array +ParticleContainer_impl +::PrintCapacity () const { Long cnt = 0; @@ -580,16 +588,18 @@ ParticleContainer_impl::PrintCap ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes: [Min: " << mn - << " (" << mn << ")" - << " ... " + << ", Max: " << mx - << " (" << mx << ")" - << "] total memory: (" << cnt << ")\n"; + << ", Total: " + << cnt + << "]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn, mx, cnt}; } template ::Redistribute (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: Redist"); + #ifdef AMREX_USE_GPU if ( Gpu::inLaunchRegion() ) { @@ -1122,6 +1134,8 @@ ParticleContainer_impl #else RedistributeCPU(lev_min, lev_max, nGrow, local, remove_negative); #endif + + BL_PROFILE_SYNC_STOP(); } template ::SortPart for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - auto& ptile = ParticlesAt(lev, mfi); - auto& aos = ptile.GetArrayOfStructs(); - const size_t np = aos.numParticles(); - auto pstruct_ptr = aos().dataPtr(); + auto& ptile = ParticlesAt(lev, mfi); + auto& aos = ptile.GetArrayOfStructs(); + auto pstruct_ptr = aos().dataPtr(); + const size_t np = aos.numParticles(); + const size_t np_total = np + aos.numNeighborParticles(); const Box& box = mfi.validbox(); @@ -1164,26 +1179,26 @@ ParticleContainer_impl::SortPart if (memEfficientSort) { { - ParticleVector tmp_particles(np); + ParticleVector tmp_particles(np_total); auto src = ptile.getParticleTileData(); ParticleType* dst = tmp_particles.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src.m_aos[inds[i]]; + dst[i] = i < np ? src.m_aos[inds[i]] : src.m_aos[i]; }); Gpu::streamSynchronize(); ptile.GetArrayOfStructs()().swap(tmp_particles); } - RealVector tmp_real(np); + RealVector tmp_real(np_total); for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) { auto src = ptile.GetStructOfArrays().GetRealData(comp).data(); ParticleReal* dst = tmp_real.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1191,13 +1206,13 @@ ParticleContainer_impl::SortPart ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); } - IntVector tmp_int(np); + IntVector tmp_int(np_total); for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) { auto src = ptile.GetStructOfArrays().GetIntData(comp).data(); int* dst = tmp_int.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total , i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1207,8 +1222,11 @@ ParticleContainer_impl::SortPart } else { ParticleTileType ptile_tmp; ptile_tmp.define(m_num_runtime_real, m_num_runtime_int); - ptile_tmp.resize(np); + ptile_tmp.resize(np_total); + // copy re-ordered particles gatherParticles(ptile_tmp, ptile, np, m_bins.permutationPtr()); + // copy neighbor particles + amrex::copyParticles(ptile_tmp, ptile, np, np, np_total-np); ptile.swap(ptile_tmp); } } @@ -1271,7 +1289,8 @@ ParticleContainer_impl Vector > new_sizes(num_levels); const auto plo = Geom(0).ProbLoArray(); const auto phi = Geom(0).ProbHiArray(); - const auto rhi = Geom(0).RoundoffHiArray(); + const auto rlo = Geom(0).ProbLoArrayInParticleReal(); + const auto rhi = Geom(0).ProbHiArrayInParticleReal(); const auto is_per = Geom(0).isPeriodicArray(); for (int lev = lev_min; lev <= finest_lev_particles; ++lev) { @@ -1292,7 +1311,7 @@ ParticleContainer_impl "perhaps particles have not been initialized correctly?"); int num_stay = partitionParticlesByDest(src_tile, assign_grid, BufferMap(), - plo, phi, rhi, is_per, lev, gid, tid, + plo, phi, rlo, rhi, is_per, lev, gid, tid, lev_min, lev_max, nGrow, remove_negative); int num_move = np - num_stay; diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index 7aa2141c0b0..c21d0ea3da7 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -1022,8 +1022,6 @@ InitRandom (Long icount, ParticleLocData pld; - int cnt = 0; - Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); host_particles.resize(finestLevel()+1); @@ -1079,8 +1077,6 @@ InitRandom (Long icount, for (int i = 0; i < NArrayInt; i++) { host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]); } - - cnt++; } } diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 7a4df446e56..e0ec8944361 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -556,7 +556,8 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic (P& p, amrex::GpuArray const& plo, amrex::GpuArray const& phi, - amrex::GpuArray const& rhi, + amrex::GpuArray const& rlo, + amrex::GpuArray const& rhi, amrex::GpuArray const& is_per) noexcept { bool shifted = false; @@ -568,7 +569,9 @@ bool enforcePeriodic (P& p, p.pos(idim) -= static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) < plo[idim]) p.pos(idim) = static_cast(plo[idim]); + if (p.pos(idim) < rlo[idim]) { + p.pos(idim) = rlo[idim]; + } shifted = true; } else if (p.pos(idim) < plo[idim]) { @@ -576,8 +579,8 @@ bool enforcePeriodic (P& p, p.pos(idim) += static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) >= rhi[idim]) { - p.pos(idim) = static_cast(rhi[idim]); + if (p.pos(idim) > rhi[idim]) { + p.pos(idim) = rhi[idim]; } shifted = true; } @@ -594,7 +597,8 @@ int partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBufferMap& pmap, const GpuArray& plo, const GpuArray& phi, - const GpuArray& rhi, + const GpuArray& rlo, + const GpuArray& rhi, const GpuArray& is_per, int lev, int gid, int /*tid*/, int lev_min, int lev_max, int nGrow, bool remove_negative) @@ -641,7 +645,7 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff else { auto p_prime = p; - enforcePeriodic(p_prime, plo, phi, rhi, is_per); + enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup_prime); assigned_lev = amrex::get<1>(tup_prime); diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 6c9494f88c5..b5c59174ae3 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -231,12 +231,26 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l } } + for (int j = 0; j < ptd.m_num_runtime_int; j++) { + if (write_int_comp_d_ptr[PC::SuperParticleType::NInt + j]) { + idata_d_ptr[iout_index] = ptd.m_runtime_idata[j][pindex]; + iout_index++; + } + } + for (int j = 0; j < PC::SuperParticleType::NReal; j++) { if (write_real_comp_d_ptr[j]) { rdata_d_ptr[rout_index] = p.rdata(j); rout_index++; } } + + for (int j = 0; j < ptd.m_num_runtime_real; j++) { + if (write_real_comp_d_ptr[PC::SuperParticleType::NReal + j]) { + rdata_d_ptr[rout_index] = ptd.m_runtime_rdata[j][pindex]; + rout_index++; + } + } } }); @@ -752,7 +766,25 @@ void WriteBinaryParticleDataAsync (PC const& pc, if (np_per_grid_local[lev][mfi.index()] > 0) { const auto& ptile = pc.ParticlesAt(lev, mfi); - new_ptile.resize(np_per_grid_local[lev][mfi.index()]); + + const auto np = np_per_grid_local[lev][mfi.index()]; + + new_ptile.resize(np); + + const auto runtime_real_comps = ptile.NumRuntimeRealComps(); + const auto runtime_int_comps = ptile.NumRuntimeIntComps(); + + constexpr auto NReal = NArrayReal + NStructReal; + constexpr auto NInt = NArrayInt + NStructInt; + + new_ptile.define(runtime_real_comps, runtime_int_comps); + + for (auto comp(0); comp < runtime_real_comps; ++comp) + new_ptile.push_back_real(NReal+comp, np, 0.); + + for (auto comp(0); comp < runtime_int_comps; ++comp) + new_ptile.push_back_int(NInt+comp, np, 0); + amrex::filterParticles(new_ptile, ptile, KeepValidFilter()); } } diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp index b5e48e6e409..4f97cbf3184 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp @@ -35,7 +35,8 @@ AmrCoreAdv::AdvancePhiAllLevels (Real time, Real dt_lev, int /*iteration*/) // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], phi_new[lev].nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_function); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp index 3ddd055eda0..7a5e1abbaa7 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp @@ -33,7 +33,8 @@ AmrCoreAdv::AdvancePhiAtLevel (int lev, Real time, Real dt_lev, int /*iteration* // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], S_new.nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_class); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H index e330d30e740..1b6832d8663 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef AMREX_USE_OMP # include @@ -98,15 +99,18 @@ private: // more flexible version of AverageDown() that lets you average down across multiple levels void AverageDownTo (int crse_lev); + enum class FillPatchType { fillpatch_class, fillpatch_function }; + // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) - void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); + void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, + int ncomp, FillPatchType fptype); // fill an entire multifab by interpolating from the coarser level // this comes into play when a new level of refinement appears void FillCoarsePatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); - // utility to copy in data from phi_old and/or phi_new into another multifab + // Pack pointers to phi_old and/or phi_new and associated times. void GetData (int lev, amrex::Real time, amrex::Vector& data, amrex::Vector& datatime); @@ -165,6 +169,9 @@ private: // used in the reflux operation amrex::Vector > flux_reg; + // This is for fillpatch during timestepping, but not for regridding. + amrex::Vector>> fillpatcher; + // Velocity on all faces at all levels amrex::Vector< amrex::Array > facevel; diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp index 62c9dc7417e..3300e4622cc 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -95,6 +94,10 @@ AmrCoreAdv::AmrCoreAdv () // with the lev/lev-1 interface (and has grid spacing associated with lev-1) // therefore flux_reg[0] is never actually used in the reflux operation flux_reg.resize(nlevs_max+1); + + // fillpatcher[lev] is for filling data on level lev using the data on + // lev-1 and lev. + fillpatcher.resize(nlevs_max+1); } AmrCoreAdv::~AmrCoreAdv () @@ -230,7 +233,8 @@ AmrCoreAdv::RemakeLevel (int lev, Real time, const BoxArray& ba, MultiFab new_state(ba, dm, ncomp, ng); MultiFab old_state(ba, dm, ncomp, ng); - FillPatch(lev, time, new_state, 0, ncomp); + // Must use fillpatch_function + FillPatch(lev, time, new_state, 0, ncomp, FillPatchType::fillpatch_function); std::swap(new_state, phi_new[lev]); std::swap(old_state, phi_old[lev]); @@ -257,6 +261,7 @@ AmrCoreAdv::ClearLevel (int lev) phi_new[lev].clear(); phi_old[lev].clear(); flux_reg[lev].reset(nullptr); + fillpatcher[lev].reset(nullptr); } // Make a new level from scratch using provided BoxArray and DistributionMapping. @@ -418,7 +423,8 @@ AmrCoreAdv::AverageDownTo (int crse_lev) // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) void -AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) +AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp, + FillPatchType fptype) { if (lev == 0) { @@ -450,16 +456,31 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) Interpolater* mapper = &cell_cons_interp; + if (fptype == FillPatchType::fillpatch_class) { + if (fillpatcher[lev] == nullptr) { + fillpatcher[lev] = std::make_unique> + (boxArray(lev ), DistributionMap(lev ), Geom(lev ), + boxArray(lev-1), DistributionMap(lev-1), Geom(lev-1), + mf.nGrowVect(), mf.nComp(), mapper); + } + } + if(Gpu::inLaunchRegion()) { GpuBndryFuncFab gpu_bndry_func(AmrCoreFill{}); PhysBCFunct > cphysbc(geom[lev-1],bcs,gpu_bndry_func); PhysBCFunct > fphysbc(geom[lev],bcs,gpu_bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } else { @@ -467,10 +488,16 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) PhysBCFunct cphysbc(geom[lev-1],bcs,bndry_func); PhysBCFunct fphysbc(geom[lev],bcs,bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } } } @@ -513,21 +540,18 @@ AmrCoreAdv::FillCoarsePatch (int lev, Real time, MultiFab& mf, int icomp, int nc } } -// utility to copy in data from phi_old and/or phi_new into another multifab void AmrCoreAdv::GetData (int lev, Real time, Vector& data, Vector& datatime) { data.clear(); datatime.clear(); - const Real teps = (t_new[lev] - t_old[lev]) * 1.e-3; - - if (time > t_new[lev] - teps && time < t_new[lev] + teps) + if (amrex::almostEqual(time, t_new[lev], 5)) { data.push_back(&phi_new[lev]); datatime.push_back(t_new[lev]); } - else if (time > t_old[lev] - teps && time < t_old[lev] + teps) + else if (amrex::almostEqual(time, t_old[lev], 5)) { data.push_back(&phi_old[lev]); datatime.push_back(t_old[lev]); @@ -631,6 +655,8 @@ AmrCoreAdv::timeStepWithSubcycling (int lev, Real time, int iteration) } AverageDownTo(lev); // average lev+1 down to lev + + fillpatcher[lev+1].reset(); // Because the data on lev have changed. } @@ -694,6 +720,10 @@ AmrCoreAdv::timeStepNoSubcycling (Real time, int iteration) // Make sure the coarser levels are consistent with the finer levels AverageDown (); + for (auto& fp : fillpatcher) { + fp.reset(); // Because the data have changed. + } + for (int lev = 0; lev <= finest_level; lev++) ++istep[lev]; diff --git a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp index 995393e05f8..4dc1076dec8 100644 --- a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp @@ -39,7 +39,7 @@ AmrCoreAdv::DefineVelocityAtLevel (int lev, Real time) facevel[lev][2].array(mfi)) }; const Box& psibox = Box(IntVect(AMREX_D_DECL(std::min(ngbxx.smallEnd(0)-1, ngbxy.smallEnd(0)-1), - std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(0)-1), + std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(1)-1), 0)), IntVect(AMREX_D_DECL(std::max(ngbxx.bigEnd(0), ngbxy.bigEnd(0)+1), std::max(ngbxx.bigEnd(1)+1, ngbxy.bigEnd(1)), diff --git a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package index e98f493727c..5254ff6f63f 100644 --- a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package +++ b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package @@ -1,3 +1,3 @@ CEXE_headers += Adv_K.H -CEXE_headers += compute_flux_K_$(DIM).H +CEXE_headers += compute_flux_$(DIM)D_K.H CEXE_headers += slope_K.H diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H index 1e5bacbc497..faf56357e29 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H @@ -231,7 +231,7 @@ protected: /* * The data. */ - amrex::FluxRegister* flux_reg; + std::unique_ptr flux_reg; /* * Static data members. diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp index db69749a85f..7fae3038f72 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp @@ -36,7 +36,6 @@ int AmrLevelAdv::do_tracers = 0; */ AmrLevelAdv::AmrLevelAdv () { - flux_reg = 0; } /** @@ -51,9 +50,9 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, : AmrLevel(papa,lev,level_geom,bl,dm,time) { - flux_reg = 0; - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -61,7 +60,6 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, */ AmrLevelAdv::~AmrLevelAdv () { - delete flux_reg; } /** @@ -74,9 +72,9 @@ AmrLevelAdv::restart (Amr& papa, { AmrLevel::restart(papa,is,bReadSpecial); - BL_ASSERT(flux_reg == 0); - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -88,11 +86,11 @@ AmrLevelAdv::checkPoint (const std::string& dir, VisMF::How how, bool dump_old) { - AmrLevel::checkPoint(dir, os, how, dump_old); + AmrLevel::checkPoint(dir, os, how, dump_old); #ifdef AMREX_PARTICLES - if (do_tracers && level == 0) { - TracerPC->WritePlotFile(dir, "Tracer"); - } + if (do_tracers && level == 0) { + TracerPC->WritePlotFile(dir, "Tracer"); + } #endif } @@ -285,7 +283,8 @@ AmrLevelAdv::advance (Real time, // State with ghost cells MultiFab Sborder(grids, dmap, NUM_STATE, NUM_GROW); - FillPatch(*this, Sborder, NUM_GROW, time, Phi_Type, 0, NUM_STATE); + // We use FillPatcher to do fillpatch here if we can + FillPatcherFill(Sborder, 0, NUM_STATE, NUM_GROW, time, Phi_Type, 0); // MF to hold the mac velocity MultiFab Umac[BL_SPACEDIM]; @@ -601,11 +600,19 @@ AmrLevelAdv::post_timestep (int iteration) // int finest_level = parent->finestLevel(); - if (do_reflux && level < finest_level) + if (do_reflux && level < finest_level) { reflux(); + } - if (level < finest_level) + if (level < finest_level) { avgDown(); + } + + if (level < finest_level) { + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); + } #ifdef AMREX_PARTICLES if (TracerPC) diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 50cc2bb8cb2..8d318f918b8 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -1,7 +1,7 @@ # # List of subdirectories to search for CMakeLists. # -set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser) +set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser CTOParFor) if (AMReX_PARTICLES) list(APPEND AMREX_TESTS_SUBDIRS Particles) diff --git a/Tests/CTOParFor/CMakeLists.txt b/Tests/CTOParFor/CMakeLists.txt new file mode 100644 index 00000000000..57c1e7715e2 --- /dev/null +++ b/Tests/CTOParFor/CMakeLists.txt @@ -0,0 +1,7 @@ +set(_sources main.cpp) +set(_input_files) + +setup_test(_sources _input_files) + +unset(_sources) +unset(_input_files) diff --git a/Tests/CTOParFor/GNUmakefile b/Tests/CTOParFor/GNUmakefile new file mode 100644 index 00000000000..0dbc65578af --- /dev/null +++ b/Tests/CTOParFor/GNUmakefile @@ -0,0 +1,20 @@ +AMREX_HOME = ../../ + +DEBUG = FALSE +DIM = 3 +COMP = gcc + +USE_MPI = FALSE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = FALSE + +CXXSTD = c++17 + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/CTOParFor/Make.package b/Tests/CTOParFor/Make.package new file mode 100644 index 00000000000..4497b0e25b9 --- /dev/null +++ b/Tests/CTOParFor/Make.package @@ -0,0 +1,4 @@ +CEXE_sources += main.cpp + + + diff --git a/Tests/CTOParFor/main.cpp b/Tests/CTOParFor/main.cpp new file mode 100644 index 00000000000..0cf1d7ea35a --- /dev/null +++ b/Tests/CTOParFor/main.cpp @@ -0,0 +1,64 @@ +#include +#include + +using namespace amrex; + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); +#if (__cplusplus >= 201703L) + { + enum A_options: int { + A0 = 0, A1 + }; + + enum B_options: int { + B0 = 0, B1, B2 + }; + + Box box(IntVect(0),IntVect(7)); + IArrayBox fab(box,2); + fab.setVal(-10); + + auto const& arr = fab.array(); + + for (int ia = 0; ia < 2; ++ia) { + for (int ib = 0; ib < 3; ++ib) { + ParallelFor(TypeList, + CompileTimeOptions>{}, + {ia, ib}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, + auto B_control) + { + auto const& larr = arr; + int a, b; + if constexpr (A_control.value == 0) { + a = 0; + } else if constexpr (A_control.value == 1) { + a = 1; + } else { + a = -1; + } + if constexpr (B_control.value == 0) { + b = 0; + } else if constexpr (B_control.value == 1) { + b = 1; + } else if constexpr (B_control.value == 2) { + b = 2; + } else if constexpr (B_control.value == 3) { + b = 3; + } + larr(i,j,k) = a*10 + b; + }); + + auto s = fab.sum(0); + AMREX_ALWAYS_ASSERT(s == box.numPts()*(ia*10+ib)); + } + } + } +#else + amrex::Print() << "This test requires C++17." << std::endl; +#endif + amrex::Finalize(); +} diff --git a/Tests/EB/CNS/Source/main.cpp b/Tests/EB/CNS/Source/main.cpp index aa851c47956..393431f8f79 100644 --- a/Tests/EB/CNS/Source/main.cpp +++ b/Tests/EB/CNS/Source/main.cpp @@ -53,7 +53,13 @@ int main (int argc, char* argv[]) AmrLevel::SetEBSupportLevel(EBSupport::full); AmrLevel::SetEBMaxGrowCells(CNS::numGrow(),4,2); - initialize_EB2(amr.Geom(amr.maxLevel()), amr.maxLevel(), amr.maxLevel()); + int max_eb_level = amr.maxLevel(); + ParmParse pp("amr"); + pp.query("max_eb_level", max_eb_level); + initialize_EB2(amr.Geom(max_eb_level), max_eb_level, max_eb_level); + if (max_eb_level < amr.maxLevel()) { + EB2::addFineLevels(amr.maxLevel() - max_eb_level); + } amr.init(strt_time,stop_time); diff --git a/Tests/GPU/CNS/Source/CNS.H b/Tests/GPU/CNS/Source/CNS.H index 877f0b523da..eedb7d486ba 100644 --- a/Tests/GPU/CNS/Source/CNS.H +++ b/Tests/GPU/CNS/Source/CNS.H @@ -157,6 +157,8 @@ protected: static int do_reflux; + static int rk_order; + static bool do_visc; static bool use_const_visc; diff --git a/Tests/GPU/CNS/Source/CNS.cpp b/Tests/GPU/CNS/Source/CNS.cpp index c3b5e2fb600..1a073c68c8a 100644 --- a/Tests/GPU/CNS/Source/CNS.cpp +++ b/Tests/GPU/CNS/Source/CNS.cpp @@ -19,6 +19,7 @@ int CNS::verbose = 0; IntVect CNS::hydro_tile_size {AMREX_D_DECL(1024,16,16)}; Real CNS::cfl = 0.3; int CNS::do_reflux = 1; +int CNS::rk_order = 2; int CNS::refine_max_dengrad_lev = -1; Real CNS::refine_dengrad = 1.0e10; @@ -241,6 +242,9 @@ CNS::post_timestep (int /*iteration*/) if (level < parent->finestLevel()) { avgDown(); + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); } } @@ -354,6 +358,7 @@ CNS::read_params () } pp.query("do_reflux", do_reflux); + pp.query("rk_order", rk_order); pp.query("do_visc", do_visc); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index c086cac0e9f..99749dded19 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -7,7 +7,7 @@ using namespace amrex; Real -CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) +CNS::advance (Real time, Real dt, int iteration, int ncycle) { BL_PROFILE("CNS::advance()"); @@ -16,11 +16,6 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) state[i].swapTimeLevels(dt); } - MultiFab& S_new = get_new_data(State_Type); - MultiFab& S_old = get_old_data(State_Type); - MultiFab dSdt(grids,dmap,NUM_STATE,0,MFInfo(),Factory()); - MultiFab Sborder(grids,dmap,NUM_STATE,NUM_GROW,MFInfo(),Factory()); - FluxRegister* fr_as_crse = nullptr; if (do_reflux && level < parent->finestLevel()) { CNS& fine_level = getLevel(level+1); @@ -36,23 +31,14 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) fr_as_crse->setVal(Real(0.0)); } - // RK2 stage 1 - FillPatch(*this, Sborder, NUM_GROW, time, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // U^* = U^n + dt*dUdt^n - MultiFab::LinComb(S_new, Real(1.0), Sborder, 0, dt, dSdt, 0, 0, NUM_STATE, 0); - computeTemp(S_new,0); - - // RK2 stage 2 - // After fillpatch Sborder = U^n+dt*dUdt^n - FillPatch(*this, Sborder, NUM_GROW, time+dt, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // S_new = 0.5*(Sborder+S_old) = U^n + 0.5*dt*dUdt^n - MultiFab::LinComb(S_new, Real(0.5), Sborder, 0, Real(0.5), S_old, 0, 0, NUM_STATE, 0); - // S_new += 0.5*dt*dSdt - MultiFab::Saxpy(S_new, Real(0.5)*dt, dSdt, 0, 0, NUM_STATE, 0); - // We now have S_new = U^{n+1} = (U^n+0.5*dt*dUdt^n) + 0.5*dt*dUdt^* - computeTemp(S_new,0); + RK(rk_order, State_Type, time, dt, iteration, ncycle, + // Given state S, compute dSdt. dtsub is needed for flux register operations + [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, + Real /*t*/, Real dtsub) { + compute_dSdt(S, dSdt, dtsub, fr_as_crse, fr_as_fine); + }, + // Optional. In case if there is anything needed after each RK substep. + [&] (int /*stage*/, MultiFab& S) { computeTemp(S,0); }); return dt; } @@ -254,5 +240,3 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, } } } - - diff --git a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H index b9bf5a18f78..75f4f784fad 100644 --- a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H +++ b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H @@ -17,24 +17,24 @@ cns_diffcoef (int i, int j, int k, { using amrex::Real; - coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); - coefs(i,j,k,CXI) = Real(0.0); - coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; + coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); + coefs(i,j,k,CXI) = Real(0.0); + coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; } AMREX_GPU_DEVICE inline void cns_constcoef (int i, int j, int k, - amrex::Array4 const& q, + amrex::Array4 const& /*q*/, amrex::Array4 const& coefs, Parm const& parm) noexcept { using amrex::Real; - coefs(i,j,k,CETA) = parm.const_visc_mu; - coefs(i,j,k,CXI) = parm.const_visc_ki; - coefs(i,j,k,CLAM) = parm.const_lambda; + coefs(i,j,k,CETA) = parm.const_visc_mu; + coefs(i,j,k,CXI) = parm.const_visc_ki; + coefs(i,j,k,CLAM) = parm.const_lambda; } AMREX_GPU_DEVICE @@ -45,7 +45,7 @@ cns_diff_x (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fx, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -81,7 +81,7 @@ cns_diff_y (int i, int j, int k, amrex::Array4 const& q, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fy, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -119,7 +119,7 @@ cns_diff_z (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fz, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.2d b/Tests/LinearSolvers/CellEB2/inputs.rt.2d index 8dfd8a7bb3f..4afdf526259 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.2d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.2d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.3d b/Tests/LinearSolvers/CellEB2/inputs.rt.3d index 9a8037a68c0..64fcef6281b 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.3d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.3d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 diff --git a/Tools/AMRProfParser/GNUmakefile b/Tools/AMRProfParser/GNUmakefile index 619d67a557a..59fd2a54b0c 100644 --- a/Tools/AMRProfParser/GNUmakefile +++ b/Tools/AMRProfParser/GNUmakefile @@ -23,7 +23,6 @@ USE_MPI = FALSE USE_OMP = FALSE EBASE = amrprofparser BL_NO_FORT = FALSE -USE_CXX11 = TRUE include $(AMREX_HOME)/Tools/GNUMake/Make.defs include $(AMREX_HOME)/Src/Base/Make.package diff --git a/Tools/Backtrace/parse_bt.py b/Tools/Backtrace/parse_bt.py index ce4a6684911..dd0234f9120 100755 --- a/Tools/Backtrace/parse_bt.py +++ b/Tools/Backtrace/parse_bt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import re diff --git a/Tools/CMake/AMReXConfig.cmake.in b/Tools/CMake/AMReXConfig.cmake.in index 6b0cdd3fd74..64a112da181 100644 --- a/Tools/CMake/AMReXConfig.cmake.in +++ b/Tools/CMake/AMReXConfig.cmake.in @@ -223,10 +223,12 @@ endif () # CUDA # # AMReX 21.06+ supports CUDA_ARCHITECTURES -if(CMAKE_VERSION VERSION_LESS 3.20) - if (@AMReX_CUDA@) - include(AMReX_SetupCUDA) - endif () +if (@AMReX_CUDA@) + if (CMAKE_VERSION VERSION_LESS 3.20) + include(AMReX_SetupCUDA) + else () + find_dependency(CUDAToolkit REQUIRED) + endif () endif () include( "${CMAKE_CURRENT_LIST_DIR}/AMReXTargets.cmake" ) diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 64dcf3f3a5f..2e89c32fddc 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -82,15 +82,15 @@ target_compile_options( Flags_CXX $<${_cxx_cray_dbg}:-O0> $<${_cxx_cray_rwdbg}:> $<${_cxx_cray_rel}:> - $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_clang_rwdbg}:-Wno-pass-failed> - $<${_cxx_clang_rel}:-Wno-pass-failed> - $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_appleclang_rwdbg}:-Wno-pass-failed> - $<${_cxx_appleclang_rel}:-Wno-pass-failed> - $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_intelllvm_rwdbg}:-Wno-pass-failed> - $<${_cxx_intelllvm_rel}:-Wno-pass-failed> + $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_clang_rwdbg}:> + $<${_cxx_clang_rel}:> + $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_appleclang_rwdbg}:> + $<${_cxx_appleclang_rel}:> + $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_intelllvm_rwdbg}:> + $<${_cxx_intelllvm_rel}:> ) # diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index ebf397266f8..61b563f7c51 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -198,10 +198,12 @@ if (AMReX_HIP) unset(_valid_hip_compilers) if(NOT DEFINED HIP_PATH) - if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") - else() + if(DEFINED ENV{HIP_PATH}) set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + elseif(DEFINED ENV{ROCM_PATH}) + set(HIP_PATH "$ENV{ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") endif() endif() @@ -255,9 +257,15 @@ if (AMReX_HIP) if(AMReX_ROCTX) # To be modernized in the future, please see: # https://github.com/ROCm-Developer-Tools/roctracer/issues/56 - target_include_directories(amrex PUBLIC ${HIP_PATH}/../roctracer/include ${HIP_PATH}/../rocprofiler/include) - target_link_libraries(amrex PUBLIC "-L${HIP_PATH}/../roctracer/lib/ -lroctracer64" "-L${HIP_PATH}/../roctracer/lib -lroctx64") - endif () + target_include_directories(amrex SYSTEM PUBLIC + ${HIP_PATH}/../roctracer/include + ${HIP_PATH}/../rocprofiler/include + ) + target_link_libraries(amrex PUBLIC + "-L${HIP_PATH}/../roctracer/lib -lroctracer64" + "-L${HIP_PATH}/../roctracer/lib -lroctx64" + ) + endif() target_link_libraries(amrex PUBLIC hip::hiprand roc::rocrand roc::rocprim) # avoid forcing the rocm LLVM flags on a gfortran @@ -271,7 +279,7 @@ if (AMReX_HIP) # else there will be a runtime issue (cannot find # missing gpu devices) target_compile_options(amrex PUBLIC - $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC} -Wno-pass-failed>) + $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC}>) endif() target_compile_options(amrex PUBLIC $<$:-m64>) diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 8e6c7f2f4d5..007b5f321fe 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -45,7 +45,7 @@ target_compile_features(SYCL INTERFACE cxx_std_17) # target_compile_options( SYCL INTERFACE - $<${_cxx_dpcpp}:-Wno-error=sycl-strict -Wno-pass-failed -fsycl> + $<${_cxx_dpcpp}:-Wno-error=sycl-strict -fsycl> $<${_cxx_dpcpp}:$<$:-fsycl-device-code-split=per_kernel>>) # temporary work-around for DPC++ beta08 bug diff --git a/Tools/CMake/AMReXThirdPartyLibraries.cmake b/Tools/CMake/AMReXThirdPartyLibraries.cmake index 1afbcac4ee2..2b0a90febe1 100644 --- a/Tools/CMake/AMReXThirdPartyLibraries.cmake +++ b/Tools/CMake/AMReXThirdPartyLibraries.cmake @@ -45,7 +45,7 @@ endif () # Sensei # if (AMReX_SENSEI) - find_package(SENSEI REQUIRED) + find_package( SENSEI 4.0.0 REQUIRED ) target_link_libraries( amrex PUBLIC sensei ) endif () diff --git a/Tools/CMake/AMReXTypecheck.cmake b/Tools/CMake/AMReXTypecheck.cmake index 926fcda9daf..0b68fb8c274 100644 --- a/Tools/CMake/AMReXTypecheck.cmake +++ b/Tools/CMake/AMReXTypecheck.cmake @@ -250,7 +250,7 @@ function( add_typecheck_target _target) add_custom_command( OUTPUT ${_cppd_file} COMMAND ${CMAKE_C_COMPILER} - ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c99 ${_fullname} > ${_cppd_file} + ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c11 ${_fullname} > ${_cppd_file} COMMAND sed ARGS -i -e 's/amrex::Real/${AMREX_REAL}/g' ${_cppd_file} COMMAND sed diff --git a/Tools/CMake/AMReX_Config.cmake b/Tools/CMake/AMReX_Config.cmake index 1754b339094..c842db1e136 100644 --- a/Tools/CMake/AMReX_Config.cmake +++ b/Tools/CMake/AMReX_Config.cmake @@ -37,22 +37,18 @@ function (configure_amrex) # # Setup compilers # - # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++14" for GNU + # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++17" for GNU # This will also enforce the same standard with the CUDA compiler # Moreover, it will also enforce such standard on all the consuming targets # set_target_properties(amrex PROPERTIES CXX_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows, C++17 for dpc++ and hip - if (AMReX_DPCPP OR AMReX_HIP) - target_compile_features(amrex PUBLIC cxx_std_17) - else () - target_compile_features(amrex PUBLIC $,Windows>,cxx_std_17,cxx_std_14>) - endif () + # minimum: C++17 + target_compile_features(amrex PUBLIC cxx_std_17) if (AMReX_CUDA) set_target_properties(amrex PROPERTIES CUDA_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows - target_compile_features(amrex PUBLIC $,Windows>,cuda_std_17,cuda_std_14>) + # minimum: C++17 + target_compile_features(amrex PUBLIC cuda_std_17) endif() # diff --git a/Tools/C_scripts/describe_sources.py b/Tools/C_scripts/describe_sources.py index c49d16694a9..97cfe5e1e1c 100755 --- a/Tools/C_scripts/describe_sources.py +++ b/Tools/C_scripts/describe_sources.py @@ -1,10 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - import argparse import os import subprocess diff --git a/Tools/C_scripts/gatherbuildtime.py b/Tools/C_scripts/gatherbuildtime.py index 082ec766c28..b0b1740847f 100755 --- a/Tools/C_scripts/gatherbuildtime.py +++ b/Tools/C_scripts/gatherbuildtime.py @@ -1,11 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys, os, glob, operator, time -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - if __name__ == "__main__": dt = float(sys.argv[3])-float(sys.argv[2]) hours, rem = divmod(dt, 3600) diff --git a/Tools/C_scripts/makebuildinfo_C.py b/Tools/C_scripts/makebuildinfo_C.py index 8a05cd3f75d..07f31c0585a 100755 --- a/Tools/C_scripts/makebuildinfo_C.py +++ b/Tools/C_scripts/makebuildinfo_C.py @@ -186,11 +186,11 @@ def runcommand(command): out = p.stdout.read() return out.strip().decode("ascii") -def get_git_hash(d): +def get_git_hash(d, git_style): cwd = os.getcwd() os.chdir(d) try: - ghash = runcommand("git describe --always --tags --dirty") + ghash = runcommand("git describe " + git_style) except: ghash = "" os.chdir(cwd) @@ -259,6 +259,10 @@ def get_git_hash(d): help="the full path to the build directory that corresponds to build_git_name", type=str, default="") + parser.add_argument("--GIT_STYLE", + help="style options for the 'git describe' command used to construct hash strings", + type=str, default="--always --tags --dirty") + # parse and convert to a dictionary args = parser.parse_args() @@ -281,7 +285,7 @@ def get_git_hash(d): git_hashes = [] for d in GIT: if d and os.path.isdir(d): - git_hashes.append(get_git_hash(d)) + git_hashes.append(get_git_hash(d, args.GIT_STYLE)) else: git_hashes.append("") @@ -291,7 +295,7 @@ def get_git_hash(d): except: build_git_hash = "directory not valid" else: - build_git_hash = get_git_hash(args.build_git_dir) + build_git_hash = get_git_hash(args.build_git_dir, args.GIT_STYLE) os.chdir(running_dir) else: build_git_hash = "" diff --git a/Tools/CompileTesting/compiletesting.py b/Tools/CompileTesting/compiletesting.py index 129e83ca960..9cb5f59bac5 100755 --- a/Tools/CompileTesting/compiletesting.py +++ b/Tools/CompileTesting/compiletesting.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys import os import shlex @@ -148,4 +147,3 @@ def run(command, outfile=None): if __name__ == "__main__": compiletesting(sys.argv[1:]) - diff --git a/Tools/F_scripts/dep.py b/Tools/F_scripts/dep.py index 894dcdb65e6..24bd8318fb8 100755 --- a/Tools/F_scripts/dep.py +++ b/Tools/F_scripts/dep.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # automatically generate Makefile dependencies for Fortran 90 source. # @@ -20,18 +20,7 @@ # (e.g. iso_c_binding). Add any system-provided modules to the # `IGNORES` list below -from __future__ import print_function - import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - -if sys.version[0] == "2": - reload(sys) - sys.setdefaultencoding('latin-1') - - import io import re import os diff --git a/Tools/F_scripts/f90doc/README b/Tools/F_scripts/f90doc/README deleted file mode 100644 index 6edb2de011f..00000000000 --- a/Tools/F_scripts/f90doc/README +++ /dev/null @@ -1,36 +0,0 @@ -This is f90doc version 0.3.4, a documentation tool for Fortran 90. For -more information (e.g., documentation), see - - http://theory.lcs.mit.edu/~edemaine/f90doc - -or contact Erik Demaine (edemaine@mit.edu). Comments, suggestions, -criticisms, and bug reports go to this e-mail address. If you modify f90doc or -use it in a serious way, please contact me (I'd be interested). - -COPYRIGHT - -f90doc is freeware. If you use it in a research or commercial project, you -must acknowledge the software and its author. I would also appreciate it if -you contact me -- I'd like to know how f90doc is used. If you base code on -f90doc, you must acknowledge this. Again, please let me know if you think your -changes would be at all useful to the rest of the world (even if you are not -willing to share it, the ideas may be useful). - -This information must accompany any copy of f90doc. - -INSTALLATION - -You shouldn't have to compile anything. You can put the file f90doc in -a more accessible place, but the .pl files have to be in the same directory. -Alternatively, you can create a symlink to the real f90doc, where the .pl -files are held. For example, - - ln -s /usr/local/lib/f90doc-0.3.4/f90doc /usr/local/bin/f90doc - -If you don't have a command /usr/bin/env, you'll need to replace the first line -of f90doc with - - #!/path/to/perl5/bin/perl -w - -Otherwise, Perl version 5.003 or higher must be the first program called "perl" -in your path. diff --git a/Tools/F_scripts/f90doc/expr_parse.pl b/Tools/F_scripts/f90doc/expr_parse.pl deleted file mode 100644 index 3e831337041..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.pl +++ /dev/null @@ -1,793 +0,0 @@ -$yysccsid = "@(#)yaccpar 1.8 (Berkeley) 01/20/91 (Perl 2.0 12/31/92)"; -#define YYBYACC 1 -#line 2 "expr_parse.y" -package expr_parse; - -;# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -;# Portion of line left to parse -$expr_parse::left = ""; -#line 12 "y.tab.pl" -$COMMA=257; -$LPAREN=258; -$RPAREN=259; -$NOT=260; -$OR=261; -$AND=262; -$EQV=263; -$NEQV=264; -$COMPARISON=265; -$DBLSLASH=266; -$PERCENT=267; -$PLUS=268; -$MINUS=269; -$UPLUS=270; -$UMINUS=271; -$ASTERIK=272; -$SLASH=273; -$DBLASTERIK=274; -$CONST=275; -$NAME=276; -$COLON=277; -$LARRAY=278; -$RARRAY=279; -$EQUALS=280; -$YYERRCODE=256; -@yylhs = ( -1, - 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 5, 5, 5, 5, 5, 4, 4, 7, 6, - 6, 3, 3, 3, 8, 8, 9, 9, 10, 10, - 10, 12, 11, 11, 11, 11, -); -@yylen = ( 2, - 1, 2, 1, 1, 1, 3, 2, 2, 2, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 1, 3, 1, 3, 3, 3, 1, 1, 5, - 7, 1, 3, 4, 0, 1, 3, 1, 1, 1, - 1, 3, 1, 2, 2, 3, -); -@yydefred = ( 0, - 0, 0, 0, 0, 3, 32, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 28, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 0, 6, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 38, 40, 41, 33, - 23, 0, 26, 25, 27, 0, 0, 0, 34, 0, - 0, 0, 0, 37, 0, 0, 0, 0, 0, -); -@yydgoto = ( 8, - 19, 10, 11, 20, 15, 63, 21, 55, 56, 57, - 58, 59, -); -@yysindex = ( -212, - -157, -212, -212, -212, 0, 0, -212, 0, -137, 0, - -246, -241, -29, -234, -235, -19, -223, -223, -29, -257, - 0, 0, -212, -212, -212, -212, -212, -212, -212, -212, - -212, -212, -212, -216, -229, -267, -222, 0, -212, 0, - -255, -19, 227, 227, 236, -164, -223, -223, -233, -233, - -233, -205, -212, -76, -174, -162, 0, 0, 0, 0, - 0, -180, 0, 0, 0, -212, -29, -212, 0, -216, - -212, -29, -29, 0, -118, -212, -95, -212, -29, -); -@yyrindex = ( 0, - 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, - 1, -59, 0, -43, 0, 163, 77, 96, -242, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -152, 0, 0, 0, 0, 0, 0, - 191, 172, 199, 208, 182, 153, 115, 134, 20, 39, - 58, -175, -219, -214, 0, -146, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -192, -188, 0, 0, - 0, -183, -178, 0, 0, 0, -145, 0, -143, -); -@yygindex = ( 0, - 2, 116, 0, 0, 0, 85, 84, 0, 0, 60, - 0, 0, -); -$YYTABLESIZE=510; -@yytable = ( 39, - 5, 9, 13, 16, 17, 18, 24, 61, 62, 27, - 28, 34, 29, 30, 29, 36, 31, 32, 33, 12, - 35, 40, 37, 38, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 54, 29, 43, 13, 43, - 33, 1, 39, 2, 39, 1, 60, 2, 31, 32, - 33, 3, 4, 62, 67, 3, 4, 11, 5, 52, - 53, 7, 5, 6, 45, 7, 45, 72, 44, 73, - 44, 54, 75, 42, 66, 42, 7, 77, 46, 79, - 46, 32, 32, 32, 69, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 70, 8, 32, 32, 32, 71, - 1, 32, 2, 29, 30, 1, 35, 31, 32, 33, - 3, 4, 36, 30, 14, 31, 14, 12, 6, 22, - 7, 64, 65, 23, 24, 25, 26, 27, 28, 74, - 29, 30, 0, 15, 31, 32, 33, 0, 76, 0, - 0, 0, 23, 24, 25, 26, 27, 28, 0, 29, - 30, 0, 16, 31, 32, 33, 0, 0, 0, 0, - 0, 78, 9, 0, 0, 23, 24, 25, 26, 27, - 28, 18, 29, 30, 0, 0, 31, 32, 33, 0, - 0, 17, 0, 0, 23, 24, 25, 26, 27, 28, - 19, 29, 30, 0, 0, 31, 32, 33, 20, 22, - 68, 3, 3, 3, 3, 3, 3, 21, 3, 3, - 0, 0, 3, 3, 3, 24, 0, 4, 4, 4, - 4, 4, 4, 0, 4, 4, 0, 0, 4, 4, - 4, 23, 24, 25, 26, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 0, 0, 5, 0, 5, - 0, 5, 5, 5, 5, 5, 5, 0, 5, 5, - 0, 0, 5, 5, 5, 0, 12, 5, 12, 5, - 12, 12, 12, 12, 12, 12, 0, 12, 12, 0, - 0, 12, 12, 0, 0, 13, 12, 13, 12, 13, - 13, 13, 13, 13, 13, 0, 13, 13, 0, 0, - 13, 13, 0, 0, 11, 13, 11, 13, 11, 11, - 11, 11, 11, 11, 0, 11, 11, 0, 0, 11, - 11, 0, 0, 7, 11, 7, 11, 7, 7, 7, - 7, 7, 7, 0, 7, 7, 0, 0, 0, 0, - 0, 0, 8, 7, 8, 7, 8, 8, 8, 8, - 8, 8, 0, 8, 8, 0, 0, 0, 0, 0, - 0, 14, 8, 14, 8, 14, 14, 14, 14, 14, - 14, 0, 14, 14, 0, 0, 0, 0, 0, 0, - 15, 14, 15, 14, 15, 15, 15, 15, 15, 15, - 0, 15, 15, 0, 0, 0, 0, 0, 0, 16, - 15, 16, 15, 16, 16, 16, 16, 16, 16, 9, - 0, 9, 0, 9, 9, 9, 9, 0, 18, 16, - 18, 16, 18, 18, 18, 18, 0, 0, 17, 9, - 17, 9, 17, 17, 17, 17, 0, 19, 18, 19, - 18, 19, 0, 19, 19, 20, 0, 20, 17, 0, - 17, 20, 20, 0, 21, 0, 21, 19, 0, 19, - 21, 21, 0, 0, 0, 20, 0, 20, 0, 0, - 0, 0, 0, 0, 21, 0, 21, 23, 24, 0, - 0, 27, 28, 0, 29, 30, 0, 0, 31, 32, - 33, 28, 0, 29, 30, 0, 0, 31, 32, 33, -); -@yycheck = ( 257, - 0, 0, 1, 2, 3, 4, 262, 275, 276, 265, - 266, 258, 268, 269, 257, 257, 272, 273, 274, 0, - 267, 279, 257, 259, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 279, 257, 0, 259, - 274, 258, 257, 260, 259, 258, 276, 260, 272, 273, - 274, 268, 269, 276, 53, 268, 269, 0, 275, 276, - 277, 278, 275, 276, 257, 278, 259, 66, 257, 68, - 259, 70, 71, 257, 280, 259, 0, 76, 257, 78, - 259, 257, 258, 259, 259, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 257, 0, 272, 273, 274, 280, - 258, 277, 260, 268, 269, 0, 259, 272, 273, 274, - 268, 269, 259, 259, 0, 259, 1, 275, 276, 257, - 278, 37, 39, 261, 262, 263, 264, 265, 266, 70, - 268, 269, -1, 0, 272, 273, 274, -1, 257, -1, - -1, -1, 261, 262, 263, 264, 265, 266, -1, 268, - 269, -1, 0, 272, 273, 274, -1, -1, -1, -1, - -1, 257, 0, -1, -1, 261, 262, 263, 264, 265, - 266, 0, 268, 269, -1, -1, 272, 273, 274, -1, - -1, 0, -1, -1, 261, 262, 263, 264, 265, 266, - 0, 268, 269, -1, -1, 272, 273, 274, 0, 259, - 277, 261, 262, 263, 264, 265, 266, 0, 268, 269, - -1, -1, 272, 273, 274, 259, -1, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, -1, 257, -1, 259, - -1, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, 257, 277, 259, 279, - 261, 262, 263, 264, 265, 266, -1, 268, 269, -1, - -1, 272, 273, -1, -1, 257, 277, 259, 279, 261, - 262, 263, 264, 265, 266, -1, 268, 269, -1, -1, - 272, 273, -1, -1, 257, 277, 259, 279, 261, 262, - 263, 264, 265, 266, -1, 268, 269, -1, -1, 272, - 273, -1, -1, 257, 277, 259, 279, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, -1, -1, - -1, -1, 257, 277, 259, 279, 261, 262, 263, 264, - 265, 266, -1, 268, 269, -1, -1, -1, -1, -1, - -1, 257, 277, 259, 279, 261, 262, 263, 264, 265, - 266, -1, 268, 269, -1, -1, -1, -1, -1, -1, - 257, 277, 259, 279, 261, 262, 263, 264, 265, 266, - -1, 268, 269, -1, -1, -1, -1, -1, -1, 257, - 277, 259, 279, 261, 262, 263, 264, 265, 266, 257, - -1, 259, -1, 261, 262, 263, 264, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, 257, 277, 259, - 279, 261, -1, 263, 264, 257, -1, 259, 277, -1, - 279, 263, 264, -1, 257, -1, 259, 277, -1, 279, - 263, 264, -1, -1, -1, 277, -1, 279, -1, -1, - -1, -1, -1, -1, 277, -1, 279, 261, 262, -1, - -1, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 266, -1, 268, 269, -1, -1, 272, 273, 274, -); -$YYFINAL=8; -#ifndef YYDEBUG -#define YYDEBUG 0 -#endif -$YYMAXTOKEN=280; -#if YYDEBUG -@yyname = ( -"end-of-file",'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','',"COMMA","LPAREN","RPAREN","NOT", -"OR","AND","EQV","NEQV","COMPARISON","DBLSLASH","PERCENT","PLUS","MINUS", -"UPLUS","UMINUS","ASTERIK","SLASH","DBLASTERIK","CONST","NAME","COLON","LARRAY", -"RARRAY","EQUALS", -); -@yyrule = ( -"\$accept : expr_with_abort", -"expr_with_abort : expr", -"expr_with_abort : expr COMMA", -"expr : CONST", -"expr : expr_without_const", -"expr_without_const : chain", -"expr_without_const : LARRAY array RARRAY", -"expr_without_const : PLUS expr", -"expr_without_const : MINUS expr", -"expr_without_const : NOT expr", -"expr_without_const : LPAREN potential_complex_or_implied_do RPAREN", -"expr_without_const : expr DBLASTERIK expr", -"expr_without_const : expr ASTERIK expr", -"expr_without_const : expr SLASH expr", -"expr_without_const : expr PLUS expr", -"expr_without_const : expr MINUS expr", -"expr_without_const : expr DBLSLASH expr", -"expr_without_const : expr COMPARISON expr", -"expr_without_const : expr AND expr", -"expr_without_const : expr OR expr", -"expr_without_const : expr EQV expr", -"expr_without_const : expr NEQV expr", -"potential_complex_or_implied_do : CONST", -"potential_complex_or_implied_do : CONST COMMA CONST", -"potential_complex_or_implied_do : expr_without_const", -"potential_complex_or_implied_do : expr_without_const COMMA do_args", -"potential_complex_or_implied_do : CONST COMMA do_args", -"array : array COMMA array_piece", -"array : array_piece", -"array_piece : expr", -"do_args : NAME EQUALS expr COMMA expr", -"do_args : NAME EQUALS expr COMMA expr COMMA expr", -"chain : NAME", -"chain : chain PERCENT NAME", -"chain : chain LPAREN exprlist RPAREN", -"exprlist :", -"exprlist : exprlist_ne", -"exprlist_ne : exprlist_ne COMMA argument", -"exprlist_ne : argument", -"argument : expr", -"argument : colonexpr", -"argument : namedargument", -"namedargument : NAME EQUALS expr", -"colonexpr : COLON", -"colonexpr : expr COLON", -"colonexpr : COLON expr", -"colonexpr : expr COLON expr", -); -#endif -sub yyclearin { $yychar = -1; } -sub yyerrok { $yyerrflag = 0; } -$YYSTACKSIZE = $YYSTACKSIZE || $YYMAXDEPTH || 500; -$YYMAXDEPTH = $YYMAXDEPTH || $YYSTACKSIZE || 500; -$yyss[$YYSTACKSIZE] = 0; -$yyvs[$YYSTACKSIZE] = 0; -sub YYERROR { ++$yynerrs; &yy_err_recover; } -sub yy_err_recover -{ - if ($yyerrflag < 3) - { - $yyerrflag = 3; - while (1) - { - if (($yyn = $yysindex[$yyss[$yyssp]]) && - ($yyn += $YYERRCODE) >= 0 && - $yycheck[$yyn] == $YYERRCODE) - { -#if YYDEBUG - print "yydebug: state $yyss[$yyssp], error recovery shifting", - " to state $yytable[$yyn]\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - next yyloop; - } - else - { -#if YYDEBUG - print "yydebug: error recovery discarding state ", - $yyss[$yyssp], "\n" if $yydebug; -#endif - return(1) if $yyssp <= 0; - --$yyssp; - --$yyvsp; - } - } - } - else - { - return (1) if $yychar == 0; -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $YYMAXTOKEN) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $yystate, error recovery discards ", - "token $yychar ($yys)\n"; - } -#endif - $yychar = -1; - next yyloop; - } -0; -} # yy_err_recover - -sub yyparse -{ -#ifdef YYDEBUG - if ($yys = $ENV{'YYDEBUG'}) - { - $yydebug = int($1) if $yys =~ /^(\d)/; - } -#endif - - $yynerrs = 0; - $yyerrflag = 0; - $yychar = (-1); - - $yyssp = 0; - $yyvsp = 0; - $yyss[$yyssp] = $yystate = 0; - -yyloop: while(1) - { - yyreduce: { - last yyreduce if ($yyn = $yydefred[$yystate]); - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; }; - print "yydebug: state $yystate, reading $yychar ($yys)\n"; - } -#endif - } - if (($yyn = $yysindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { -#if YYDEBUG - print "yydebug: state $yystate, shifting to state ", - $yytable[$yyn], "\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - $yychar = (-1); - --$yyerrflag if $yyerrflag > 0; - next yyloop; - } - if (($yyn = $yyrindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { - $yyn = $yytable[$yyn]; - last yyreduce; - } - if (! $yyerrflag) { - &yyerror('syntax error'); - ++$yynerrs; - } - return(1) if &yy_err_recover; - } # yyreduce -#if YYDEBUG - print "yydebug: state $yystate, reducing by rule ", - "$yyn ($yyrule[$yyn])\n" if $yydebug; -#endif - $yym = $yylen[$yyn]; - $yyval = $yyvs[$yyvsp+1-$yym]; - switch: - { -if ($yyn == 1) { -#line 29 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; return 1; -last switch; -} } -if ($yyn == 2) { -#line 30 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; return "s,"; -last switch; -} } -if ($yyn == 3) { -#line 33 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 4) { -#line 34 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 5) { -#line 37 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 6) { -#line 38 "expr_parse.y" -{ $yyval = [ "%array", @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 7) { -#line 39 "expr_parse.y" -{ $yyval = [ "u+", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 8) { -#line 40 "expr_parse.y" -{ $yyval = [ "u-", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 9) { -#line 41 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 10) { -#line 43 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; -last switch; -} } -if ($yyn == 11) { -#line 44 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 12) { -#line 45 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 13) { -#line 46 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 14) { -#line 47 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 15) { -#line 48 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 16) { -#line 49 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 17) { -#line 50 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 18) { -#line 51 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 19) { -#line 52 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 20) { -#line 53 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 21) { -#line 54 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 22) { -#line 57 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 23) { -#line 59 "expr_parse.y" -{ my ($type1, $val1) = @{$yyvs[$yyvsp-2]}; - my ($type2, $val2) = @{$yyvs[$yyvsp-0]}; - $yyval = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - -last switch; -} } -if ($yyn == 24) { -#line 64 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 25) { -#line 66 "expr_parse.y" -{ $yyval = [ "%do", $yyvs[$yyvsp-2], @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 26) { -#line 68 "expr_parse.y" -{ $yyval = [ "%do", [ "%const", @{$yyvs[$yyvsp-2]} ], @{$yyvs[$yyvsp-0]} ]; - -last switch; -} } -if ($yyn == 27) { -#line 72 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 28) { -#line 73 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 29) { -#line 76 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 30) { -#line 80 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 31) { -#line 82 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-6], $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 32) { -#line 85 "expr_parse.y" -{ $yyval = [ "%var", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 33) { -#line 86 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 34) { -#line 87 "expr_parse.y" -{ $yyval = [ "%call", $yyvs[$yyvsp-3], @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 35) { -#line 90 "expr_parse.y" -{ $yyval = []; -last switch; -} } -if ($yyn == 36) { -#line 91 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 37) { -#line 94 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 38) { -#line 95 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 39) { -#line 98 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 40) { -#line 99 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 41) { -#line 100 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 42) { -#line 103 "expr_parse.y" -{ $yyval = [ "%namedarg", $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 43) { -#line 106 "expr_parse.y" -{ $yyval = [ "%colon", "", "" ]; -last switch; -} } -if ($yyn == 44) { -#line 107 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-1], "" ]; -last switch; -} } -if ($yyn == 45) { -#line 108 "expr_parse.y" -{ $yyval = [ "%colon", "", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 46) { -#line 109 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-2], $yyvs[$yyvsp-1] ]; -last switch; -} } -#line 624 "y.tab.pl" - } # switch - $yyssp -= $yym; - $yystate = $yyss[$yyssp]; - $yyvsp -= $yym; - $yym = $yylhs[$yyn]; - if ($yystate == 0 && $yym == 0) - { -#if YYDEBUG - print "yydebug: after reduction, shifting from state 0 ", - "to state $YYFINAL\n" if $yydebug; -#endif - $yystate = $YYFINAL; - $yyss[++$yyssp] = $YYFINAL; - $yyvs[++$yyvsp] = $yyval; - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $YYFINAL, reading $yychar ($yys)\n"; - } -#endif - } - return(0) if $yychar == 0; - next yyloop; - } - if (($yyn = $yygindex[$yym]) && ($yyn += $yystate) >= 0 && - $yyn <= $#yycheck && $yycheck[$yyn] == $yystate) - { - $yystate = $yytable[$yyn]; - } else { - $yystate = $yydgoto[$yym]; - } -#if YYDEBUG - print "yydebug: after reduction, shifting from state ", - "$yyss[$yyssp] to state $yystate\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate; - $yyvs[++$yyvsp] = $yyval; - } # yyloop -} # yyparse -#line 112 "expr_parse.y" - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; -#line 794 "y.tab.pl" diff --git a/Tools/F_scripts/f90doc/expr_parse.y b/Tools/F_scripts/f90doc/expr_parse.y deleted file mode 100644 index 94070cfc768..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.y +++ /dev/null @@ -1,234 +0,0 @@ -%{ -package expr_parse; - -# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -# Portion of line left to parse -$expr_parse::left = ""; -%} - -%token COMMA LPAREN RPAREN NOT OR AND EQV NEQV COMPARISON DBLSLASH PERCENT -%token PLUS MINUS UPLUS UMINUS ASTERIK SLASH DBLASTERIK CONST NAME COLON -%token LARRAY RARRAY EQUALS - -%left EQV NEQV -%left OR -%left AND -%nonassoc NOT -%nonassoc COMPARISON -%left DBLSLASH -%left PLUS MINUS -%nonassoc UPLUS UMINUS -%left ASTERIK SLASH -%right DBLASTERIK -%left PERCENT - -%% - -expr_with_abort: expr { $$ = $1; return 1; } - | expr COMMA { $$ = $1; return "s,"; } - -expr: - CONST { $$ = [ "%const", @{$1} ]; } - | expr_without_const { $$ = $1; } - -expr_without_const: - chain { $$ = $1; } - | LARRAY array RARRAY { $$ = [ "%array", @{$2} ]; } - | PLUS expr %prec UPLUS { $$ = [ "u+", $2 ]; } - | MINUS expr %prec UMINUS { $$ = [ "u-", $2 ]; } - | NOT expr { $$ = [ $1, $2 ]; } - | LPAREN potential_complex_or_implied_do RPAREN - { $$ = $2; } - | expr DBLASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr ASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr SLASH expr { $$ = [ $2, $1, $3 ]; } - | expr PLUS expr { $$ = [ $2, $1, $3 ]; } - | expr MINUS expr { $$ = [ $2, $1, $3 ]; } - | expr DBLSLASH expr { $$ = [ $2, $1, $3 ]; } - | expr COMPARISON expr { $$ = [ $2, $1, $3 ]; } - | expr AND expr { $$ = [ $2, $1, $3 ]; } - | expr OR expr { $$ = [ $2, $1, $3 ]; } - | expr EQV expr { $$ = [ $2, $1, $3 ]; } - | expr NEQV expr { $$ = [ $2, $1, $3 ]; } - -potential_complex_or_implied_do: - CONST { $$ = [ "%const", @{$1} ]; } - | CONST COMMA CONST - { my ($type1, $val1) = @{$1}; - my ($type2, $val2) = @{$3}; - $$ = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - } - | expr_without_const { $$ = $1; } - | expr_without_const COMMA do_args - { $$ = [ "%do", $1, @{$3} ]; } - | CONST COMMA do_args - { $$ = [ "%do", [ "%const", @{$1} ], @{$3} ]; - } - -array: - array COMMA array_piece { $$ = [ @{$1}, $3 ]; } - | array_piece { $$ = [ $1 ]; } - -array_piece: - expr { $$ = $1; } -# | implied_do is handled within expr - -do_args: - NAME EQUALS expr COMMA expr { $$ = [ $1, $3, $5 ]; } - | NAME EQUALS expr COMMA expr COMMA expr - { $$ = [ $1, $3, $5, $7 ]; } - -chain: - NAME { $$ = [ "%var", $1 ]; } - | chain PERCENT NAME { $$ = [ $2, $1, $3 ]; } - | chain LPAREN exprlist RPAREN { $$ = [ "%call", $1, @{$3} ]; } - -exprlist: - { $$ = []; } - | exprlist_ne { $$ = $1; } - -exprlist_ne: - exprlist_ne COMMA argument { $$ = [ @{$1}, $3 ]; } - | argument { $$ = [ $1 ]; } - -argument: - expr { $$ = $1; } - | colonexpr { $$ = $1; } - | namedargument { $$ = $1; } - -namedargument: - NAME EQUALS expr { $$ = [ "%namedarg", $1, $3 ]; } - -colonexpr: - COLON { $$ = [ "%colon", "", "" ]; } - | expr COLON { $$ = [ "%colon", $1, "" ]; } - | COLON expr { $$ = [ "%colon", "", $2 ]; } - | expr COLON expr { $$ = [ "%colon", $1, $2 ]; } - -%% - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; diff --git a/Tools/F_scripts/f90doc/f90doc b/Tools/F_scripts/f90doc/f90doc deleted file mode 100755 index 0afe6dafe73..00000000000 --- a/Tools/F_scripts/f90doc/f90doc +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env perl -eval 'exec perl $0 ${1+"$@"}' - if 0; -warn ("Perl 5 not detected, likely a big problem") if $] < 5.0; -warn "Less than Perl 5.003. You may witness mysterious segmentation faults." - if $] < 5.003; - -use strict; - -BEGIN { - my $zero = $0; - while (-l $zero) { - my $nextzero = readlink $zero; - if (substr ($nextzero, 0, 1) eq "/") { - $zero = $nextzero; - } elsif ($zero =~ m#^(.*)/#) { - $zero = "$1/$nextzero"; - } else { - $zero = $nextzero; - } - } - if ($zero =~ m#(.*)/\w+#) { - push @INC, "$1/../common/", $1; - } else { - push @INC, "../common/", "."; - } -} - -require "htmling.pl"; -require "stmts.pl"; -require "utils.pl"; -#require "expr_parse.pl"; -#require "typing.pl"; - -#################### - -if (! @ARGV) { - print <$part in module $1"); - } else { - push (@::see_list, "module $1"); - } - } elsif ($macro =~ /^author\s+/i) { - push (@::authors, $'); - } elsif ($macro =~ /^version\s+/i) { - die "Two versions in a single !! block" if $::version_num; - $::version_num = $'; - } else { - die "Unrecognized macro $macro"; - } -} diff --git a/Tools/F_scripts/f90doc/htmling.pl b/Tools/F_scripts/f90doc/htmling.pl deleted file mode 100644 index 956513244d9..00000000000 --- a/Tools/F_scripts/f90doc/htmling.pl +++ /dev/null @@ -1,376 +0,0 @@ -package htmling; - -use strict; - -### CONSTANTS -$htmling::dblspace = " "; -$htmling::indentspace = $htmling::dblspace x 2; -$htmling::headerspace = $htmling::indentspace; -$htmling::comment_indent = $htmling::indentspace x 2; - -### PUBLIC GLOBALS -$htmling::comments_type = "smart"; -$htmling::suppress_calls = 0; -$htmling::calls_make_links = 0; -$htmling::html_filenames_original_case = 0; - -### GLOBALS -$htmling::htmlfile = ""; -$htmling::indent = 0; - -# Return the name of the HTML file for the specified PROGRAM or MODULE -sub html_filename { - my ($name) = @_; - $name = lc $name unless $htmling::html_filenames_original_case; - return $name . ".html"; -} - -# This is the main calling point from f90doc. -# Takes all top-level objects: programs, subroutines, functions, and modules. -# Warns if given something else. -sub do_toplevel { - my ($top, $outfile) = @_; - - my $type = $top->{'type'}; - unless ($type eq 'module' || $type eq 'subroutine' || $type eq 'function' || - $type eq 'program') { - warn "Warning: Unrecognized top-level object $type will not be documented.\n"; - return; - } - - # A positive-length name. Necessary because programs may not have names. - if (defined $outfile) { - $htmling::htmlfile = $outfile; - } else { - $htmling::htmlfile = html_filename ( - ($top->{'name'} eq '' ? $type : $top->{'name'})); - } - print "Generating $htmling::htmlfile...\n"; - open OUT, ">$htmling::htmlfile"; - - print OUT "\n"; - print OUT "\n"; - print OUT " $type $top->{'name'} (generated by f90doc) \n"; - print OUT "\n"; - print OUT "

", ucfirst ($type), " $top->{'name'}

\n"; - print OUT "
$type $top->{'name'}\n";
-
-  list_uses (@{$top->{'uses'}});
-  list_calls (1, keys %{$top->{'calls'}}) if exists $top->{'calls'};
-  list_html ("Types", map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Variables", map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Interfaces", map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Subroutines and functions", map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}));
-
-  print OUT "\nend $type $top->{'name'}\n";
-  do_comments ($top->{'comments'}, 1);
-
-  my @list;
-  @list = map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}});
-  print OUT "\n

Description of Types

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Variables

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Interfaces

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Subroutines and Functions

\n" if @list; - do_html (@list); - - print OUT "\n"; - close OUT; -} - -sub list_uses { - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! Uses\n"; - my ($use); - foreach $use (@_) { - my ($module, $extra) = @$use; - $extra = defined $extra ? ", $extra" : ""; - print OUT "${htmling::indentspace}", - "use $module$extra\n"; - } - } -} - -sub list_calls { - return if $htmling::suppress_calls; - my ($big, @calls) = (@_); - if (@calls) { - @calls = sort @calls; - @calls = map { "$_" } @calls - if $htmling::calls_make_links; - if ($big) { - print OUT join ("\n", - "\n${htmling::indentspace}${htmling::headerspace}! Calls", - (map { "${htmling::indentspace}call $_" } @calls), ""); - } else { - print OUT "${htmling::indentspace}! Calls: ", join (", ", @calls), "\n"; - } - } -} - -sub list_html { - my ($title) = shift; - - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! $title\n"; - my ($struct); - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - my ($href) = "$name"; - print OUT $htmling::indentspace; - if ($type eq "var") { - print OUT var2str ($struct, $href) . "\n"; - } elsif ($type eq "subroutine" || - $type eq "function") { - print OUT join (" ", attriblist ($struct), ""); - print OUT typing::type_to_f90 ($struct->{'rtype'}) . " " - if exists $struct->{'rtype'}; - my $flag; - for $flag ('recursive', 'elemental', 'pure') { - print OUT "$flag " if $struct->{$flag}; - } - print OUT "$type $href"; - print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")"; - print OUT " result ($struct->{'result'})" - if exists $struct->{'result'} && !exists $struct->{'rtype'}; - print OUT "\n"; - } else { - print OUT join (" ", attriblist ($struct), ""); - print OUT "$type $href\n"; - } - } - } -} - -sub do_html { - if (@_) { - my ($struct); - - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - if (! $htmling::indent) { - print OUT "

$name

\n"; - print OUT "
";
-         }
-
-         print OUT $htmling::indentspace x $htmling::indent;
-         if ($type eq "var") {
-             print OUT var2str ($struct) . "\n";
-         } elsif ($type eq "mprocedure") {
-             die "do_html: bare module procedure $struct->{'name'} (no enclosing module)"
-                 unless exists $struct->{'bind'};
-             print OUT
-                 "module procedure {'bind'}->{'type'}_" .
-                 lc ($struct->{'name'}) . "\">$name\n";
-         } elsif ($type eq "subroutine" || $type eq "function") {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT typing::type_to_f90 ($struct->{'rtype'}) . " "
-                 if exists $struct->{'rtype'} && !exists $struct->{'result'};
-             my $flag;
-             for $flag ('recursive', 'elemental', 'pure') {
-               print OUT "$flag " if $struct->{$flag};
-             }
-             print OUT "$type $name";
-             print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")";
-             print OUT " result ($struct->{'result'})"
-               if exists $struct->{'result'};
-             print OUT "\n";
-         } else {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT "$type $name\n";
-         }
-
-         $htmling::indent++;
-
-         if ($type eq "var" || $type eq "mprocedure") {
-         } elsif ($type eq "type") {
-           print OUT $htmling::indentspace x $htmling::indent, "private\n"
-             if exists $struct->{'privatetype'};
-           print OUT $htmling::indentspace x $htmling::indent, "sequence\n"
-             if exists $struct->{'sequencetype'};
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "interface") {
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "subroutine" || $type eq "function") {
-           my @interest = @{$struct->{'parms'}};
-           push @interest, $struct->{'result'} if exists $struct->{'result'};
-           push @interest, $name
-             if $type eq "function" && !exists $struct->{'result'} &&
-               !exists $struct->{'rtype'};
-           my $arg;
-           foreach $arg (@interest) {
-             my (@things) = values %{$struct->{'contains'}->{lc $arg}};
-             die "Confused by/no declaration for parameter $arg of $type $name"
-               if scalar @things != 1;
-             do_html ($things[0]);
-           }
-         } else {
-           die "do: I don't know what a $type is";
-         }
-
-         list_calls (0, keys %{$struct->{'calls'}}) if exists $struct->{'calls'};
-
-         $htmling::indent--;
-
-         if ($type ne "var" && $type ne "mprocedure") {
-            print OUT $htmling::indentspace x $htmling::indent . "end $type $name\n";
-         }
-
-         do_comments ($struct->{'comments'}, ! $htmling::indent);
-      }
-   }
-}
-
-# Pass comments and a flag saying if you want to end the current 
 block.
-sub do_comments {
-   my ($comments, $endpre) = @_;
-   if ($comments eq "") {
-      print OUT "
\n" if $endpre; - return; - } - - #print OUT "\n" unless $htmling::indent; - - if ($htmling::comments_type eq "preformatted") { - my ($s) = $htmling::indentspace x $htmling::indent . $htmling::comment_indent; - $comments =~ s/^/$s/m if $htmling::indent; - $comments =~ s/^\n*//s; - $comments =~ s/\n*$//s; - print OUT $comments, "\n"; - print OUT "
\n" if $endpre; - } else { - print OUT "
\n"; - print OUT "
\n" if $htmling::indent; - if ($htmling::comments_type eq "html") { - } elsif ($htmling::comments_type eq "smart") { - my @newcomments = (); - my $verbmode = 0; - my @listmode = (); - my $line; - foreach $line (split ("\n", $comments)) { - if ($verbmode) { - if ($line =~ /^>/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } elsif ($line =~ /^"; - } elsif ($line =~ /^v/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } else { - push @newcomments, $line; - } - next; - } - - # _italic_ and *bold* - while ($line =~ /(\A|\W)_(\w|\w.*?\w)_(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/_/ /g; - $line = $left . $mid . $right; - } - while ($line =~ /(\A|\W)\*(\w|\w.*?\w)\*(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/\*/ /g; - $line = $left . $mid . $right; - } - - # Lists - if ($line =~ /^( *)-/) { - if (! @listmode || length ($1) > $listmode[$#listmode]) { - push @listmode, length $1; - push @newcomments, $1 . "
    "; - } else { - while ($listmode[$#listmode] != length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . "
"; - pop @listmode; - die "Unindented to invalid position in `$line'" - unless @listmode; - } - } - push @newcomments, $1 . "
  • " . substr ($line, length ($&)); - } elsif ($line =~ /^>/) { - #warn "Verbatim mode started in list mode" if @listmode; - $verbmode = 1; - substr ($line, 0, 1) = " "; - push @newcomments, "
    " . $line;
    -            # Ignore $line =~ /^$line
    "; - } elsif ($line =~ /^\s*$/) { - push @newcomments, "

    "; - } elsif (@listmode) { - $line =~ /^( *)(\t?)/; - warn "Tabs have strange effects on indentation detection" - if length ($2) > 0; - while (@listmode && $listmode[$#listmode] > length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . ""; - pop @listmode; - } - push @newcomments, $line; - } else { - push @newcomments, $line; - } - } - my $list; - foreach $list (@listmode) { - push @newcomments, " " x $list . ""; - } - $comments = join ("\n", @newcomments); - } else { - die "Unsupported comments type `$htmling::comments_type'"; - } - $comments =~ s/

    \n(

    \n)+/

    \n/g; - $comments =~ s/

    \n$//; - $comments =~ s/^

    \n//; - $comments =~ s/

    /

    /g if $htmling::indent; - print OUT $comments . "\n"; - print OUT "
  • \n" if $htmling::indent; - print OUT "
    " unless $endpre;
    -   }
    -}
    -
    -sub var2str {
    -    my ($var, $href) = @_;
    -
    -    my ($typestr) = typing::type_to_f90 ($var->{'vartype'});
    -    my ($initial) = (!exists $var->{'initial'} ? ""
    -          : " $var->{'initop'} " . typing::expr_to_f90 ($var->{'initial'}));
    -    $href = txt2html ($var->{'name'}) unless $href;
    -    return $typestr . join (", ", "", attriblist ($var)) . " :: $href$initial";
    -}
    -
    -sub txt2html {
    -    my ($txt) = @_;
    -    $txt =~ s//>/g;
    -    return $txt;
    -}
    -
    -sub attriblist {
    -    my ($struct) = @_;
    -    my @attribs = ();
    -
    -    push @attribs, $struct->{'vis'} if exists $struct->{'vis'};
    -    push @attribs, "optional" if exists $struct->{'optional'};
    -    push @attribs, @{$struct->{'tempattribs'}}
    -        if exists $struct->{'tempattribs'};
    -
    -    return @attribs;
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/stmts.pl b/Tools/F_scripts/f90doc/stmts.pl
    deleted file mode 100644
    index 83d20a300af..00000000000
    --- a/Tools/F_scripts/f90doc/stmts.pl
    +++ /dev/null
    @@ -1,891 +0,0 @@
    -package stmts;
    -
    -use strict;
    -
    -require "expr_parse.pl";
    -require "typing.pl";
    -require "utils.pl";
    -
    -#########################################################################
    -# PUBLIC GLOBALS
    -
    -# Set to a reference to a routine to take !! comments if !! comments are
    -# to be caught.
    -$stmts::bangbang = "";
    -
    -# Set to a reference to a routine to return accumulated comments if !! comments
    -# are caught.  You should reset them after each time you call read_line or
    -# read_stmt.
    -$stmts::comments = "";
    -
    -# Set this to disable warnings.  Don't use this for a compiler!  Suitable for
    -# something like f90doc though.  This shouldn't be used once stmts supports
    -# all Fortran 90 statements and attributes; until then, it's pretty much
    -# needed; after then, it should be removed.
    -$stmts::diable_warns = 0;
    -
    -# Set this to use fixed-form Fortran, like good old Fortran 77.
    -$stmts::fixed_form = 0;
    -
    -#########################################################################
    -# PRIVATE GLOBALS
    -
    -# A "left-over" piece of a statement is stored here when semi-colons are
    -# encountered.
    -$stmts::leftover = "";
    -
    -# Number of opened files.
    -$stmts::nfile = 0;
    -
    -# List of string's values.
    -@stmts::strings = ();
    -
    -# List of structure pointers that we're currently nested in.
    -# topnest stores the top of the stack.
    -@stmts::nesting = ();
    -$stmts::topnest = undef;
    -
    -# List of structure pointers that we're currently nested in, but for a
    -# specified type.
    -%stmts::nesting_by = ();
    -
    -#########################################################################
    -# ROUTINES
    -
    -#####
    -# Reads an entire file, and returns all the top-level structures found.
    -# If specified, a given function will be called after every statement
    -# (usually this is for resetting !! comments and such).
    -#####
    -sub read_file {
    -  my ($filename, $every_stmt) = @_;
    -  stmts::open_file ($filename);
    -
    -  my ($stmt, $struct, @rval);
    -  my @toplevel = ();
    -  while ((@rval = stmts::read_stmt ()) [0]) {
    -    push @toplevel, $rval[1] if !defined $stmts::topnest && ref $rval[1];
    -    &$every_stmt () if defined $every_stmt;
    -  }
    -
    -  return @toplevel;
    -}
    -
    -#####
    -# Starts reading the specified filename.
    -#####
    -sub open_file {
    -   my ($filename) = @_;
    -   $stmts::FILE = "";
    -
    -   open IN, $filename
    -     or die "Couldn't open $filename";
    -   $stmts::{'FILE' . $stmts::nfile} = $stmts::{'IN'};
    -}
    -
    -#####
    -# Cleans up from reading the current file.
    -# This is automatically called by read_line, so most don't have to worry
    -# about it.
    -# Returns false if there are no files left.
    -#####
    -sub close_file {
    -   close IN;
    -   $stmts::nfile--;
    -   if ($stmts::nfile > 0) {
    -      # CHECK--does this still do the desired thing, in light of open_file?
    -      $stmts::{'IN'} = $stmts::{'FILE' . $stmts::nfile};
    -      return 1;
    -   } else {
    -      # Clean up strings.
    -      @stmts::strings = ();
    -      return 0;
    -   }
    -}
    -
    -#####
    -# Reads a line of Fortran 90 doing whatever it takes.  This may involve
    -# reading multiple lines from the current file, walking into files, etc.
    -# INCLUDE is parsed at this level.
    -# Note that the returned string may have various cases (lc isn't called).
    -#####
    -sub read_line {
    -
    -ALLOVERAGAIN:
    -  my $line;
    -  if ($stmts::leftover ne '') {
    -    $line = $stmts::leftover;
    -    $stmts::leftover = '';
    -  } else {
    -    $line = ;
    -    until (defined $line) {
    -      return "" unless close_file ();
    -      $line = ;
    -    }
    -    chomp $line;
    -
    -    substr ($line, 0, 1) = '!' if $stmts::fixed_form && $line =~ /^\S/;
    -  }
    -
    -  # This is used for fixed-form continuations.
    -  my $lastlen = length $line;
    -
    -  my $continue = 0;
    -
    -  while (1) {
    -    # Grab doubled comments (!!) if requested.
    -    if ($stmts::bangbang && $line =~ /^([^"'!]|('[^']*')|("[^"]*"))*(!!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -      &$stmts::bangbang ($4);
    -    }
    -
    -    # Delete comments.
    -    elsif ($line =~ /^([^"'!]|(\'[^']*')|("[^"]*"))*(!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -    }
    -
    -    # Fixed-form continuations.
    -    if ($stmts::fixed_form) {
    -
    -      # Check next line for continuation mark.
    -      $stmts::leftover = ;
    -      $stmts::leftover = '' unless defined $stmts::leftover;
    -      chomp $stmts::leftover;
    -      substr ($stmts::leftover, 0, 1) = '!' if $stmts::leftover =~ /^\S/;
    -      if ($stmts::leftover =~ /^\s....\S/) {
    -
    -        # Pad previous line with spaces if it had less than 72 characters.
    -        $line .= ' ' x (72-$lastlen) if $lastlen < 72;
    -
    -        # Add next (continuation) line to the line.
    -        $line .= substr ($stmts::leftover, 6);
    -        $lastlen = length $stmts::leftover;
    -        
    -        # Continue on to check the next line.
    -        $stmts::leftover = '';
    -        next;
    -      }
    -      
    -    # Free-form continuations.
    -    } elsif ($continue || $line =~ /&\s*$/) {
    -      $line = $` if $line =~ /&\s*$/;
    -      my $rest = ;
    -      chomp $rest;
    -      $rest = $' if $rest =~ /^\s*&/;
    -      $line = "$line$rest";
    -      # Blank lines don't stop the continuation.
    -      $continue = ($rest =~ /^\s*(?:!.*)?$/);
    -      next;
    -    }
    -
    -    last;
    -  }
    -
    -  # Semicolons.
    -  if ($line =~ /^([^;]*);(.*)$/) {
    -    $line = $1;
    -    if ($stmts::leftover eq '') {
    -      $stmts::leftover = $2;
    -    } else {
    -      $stmts::leftover .= ";$2";
    -    }
    -  }
    -
    -  # Replace strings to avoid confusion.
    -  my @quotes;
    -  while ($line =~ / " ([^"]|"")* " | ' ([^']|'')* ' /xg) {
    -    push @quotes, [length $`, length $&, $&];
    -  }
    -  for my $quote (reverse @quotes) {
    -    ## Process in reverse order so that $start is preserved despite replacement
    -    my ($start, $length, $string) = @$quote;
    -    push @stmts::strings, $string;
    -    substr ($line, $start, $length) = "\'" . $#stmts::strings . "\'";
    -  }
    -
    -  # Get rid of spaces on either end.
    -  $line = utils::trim ($line);
    -
    -  goto ALLOVERAGAIN if $line eq '';
    -
    -  #print "read line `$line'\n";
    -
    -  return $line;
    -}
    -
    -#####
    -# Returns the physical value for the given string number.
    -#####
    -sub get_string {
    -   my ($n) = @_;
    -   return $stmts::strings[$n];
    -}
    -
    -#####
    -# Reads a Fortran 90 statement from the current input.
    -# Checks for proper nesting, etc., and keeps tracks of what's in what.
    -# Possible results:
    -#    ('?', $the_line)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('module', \%structure)
    -#    ('endmodule', \%structure)
    -#    ('subroutine', \%structure)
    -#    ('endsubroutine', \%structure)
    -#    ('function', \%structure)
    -#    ('endfunction', \%structure)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('type', \%structure)
    -#    ('endtype', \%structure)
    -#    ('interface', \%structure)
    -#    ('endinterface', \%structure)
    -#    ('var', \%struct1, \%struct2, ...)
    -#    ('contains', \%parent)
    -#    ('public', $name1, $name2, ...)          empty means global default
    -#    ('private', $name1, $name2, ...)         empty means global default
    -#    ('optional', $name1, $name2, ...)
    -#    ('call', $arg1, $arg2, ...)              currently args are unparsed
    -#####
    -sub read_stmt {
    -   my ($line) = read_line ();
    -   if (! $line) {
    -      die "File ended while still nested" if @stmts::nesting;
    -      return ("", "");
    -   }
    -
    -   # MODULE PROCEDURE (must be before module)
    -   if ($line =~ /^module\s+procedure\s+(\w.*)$/i) {
    -      die "module procedure outside of interface block" unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "interface" && $stmts::topnest->{'name'} ne "";
    -      my (@list) = split (/\s*,\s*/, utils::trim ($1));
    -      my ($p);
    -      foreach $p (@list) {
    -         die "Invalid module procedure `$p'" unless $p =~ /^\w+$/;
    -         new_struct ({
    -            'type'   => "mprocedure",
    -            'name'   => $p,
    -            hashed_comments ()
    -         });
    -      }
    -      return ("mprocedure", @list);
    -   }
    -
    -   # MODULE/PROGRAM
    -   elsif ($line =~ /^(module|program)(?:\s+(\w+))?$/i) {
    -      die "$1 begun not at top level" if defined $stmts::topnest;
    -      return new_nest ({
    -         'type' => lc $1,
    -         'name' => (defined $2 ? $2 : ''),
    -         hashed_comments ()
    -      });
    -   }
    -
    -   # END MODULE/SUBROUTINE/FUNCTION/PROGRAM/TYPE/INTERFACE, or general END
    -   elsif ($line =~ /^end\s*(?:(module|subroutine|function|program|type|interface)(?:\s+(\w+))?)?$/i) {
    -      die "END statement outside of any nesting" unless defined $stmts::topnest;
    -      my $top = $stmts::topnest;
    -
    -      # We do some special "fixing up" for modules, which resolves named
    -      # references (module procedures) and computes publicity.
    -      #
    -      # Note that end_nest will ensure that the type of thing ended matches
    -      # the thing the user says it is ending, so we don't have to worry about
    -      # that.
    -      if ($top->{'type'} eq "module") {
    -
    -        # Set publicity (visibility) of objects within the module.
    -
    -        # First, the explicitly set ones.
    -        my $name;
    -        foreach $name (@{$top->{'publiclist'}}) {
    -          do_attrib ($name, "vis", 'public', "visibility");
    -        }
    -        foreach $name (@{$top->{'privatelist'}}) {
    -          do_attrib ($name, "vis", 'private', "visibility");
    -        }
    -
    -        # Second, the globally set ones (those obeying the default).
    -        my $obj;
    -        $top->{'defaultvis'} = "public" unless exists $top->{'defaultvis'};
    -        foreach $obj (@{$top->{'ocontains'}}) {
    -          $obj->{'vis'} = $top->{'defaultvis'} unless exists $obj->{'vis'};
    -        }
    -
    -        # Traverse (arbitrarily deeply) nested structures.
    -        sub traverse {
    -          my ($node) = @_;
    -          my $top = $stmts::topnest;   # HAVE NO IDEA WHY THIS IS NEEDED
    -          
    -          # Graduate nested MODULE PROCEDURE (mprocedure) to point to the
    -          # appropriate thing (either a function or a subroutine with that
    -          # name).
    -          if ($node->{'type'} eq "mprocedure") {
    -            die "Couldn't find module procedure $node->{'name'} (nothing with that name in module $top->{'name'})"
    -              unless exists $top->{'contains'}->{lc $node->{'name'}};
    -            
    -            my ($possibles) =
    -              $top->{'contains'}->{lc $node->{'name'}};
    -            die "Couldn't find module procedure $node->{'name'} in module $top->{'name'} (wrong type)"
    -              if !exists $possibles->{'subroutine'}
    -              && !exists $possibles->{'function'};
    -            die "Found both a subroutine and function to match module procedure $node->{'name'} in module $top->{'name'}"
    -              if exists $possibles->{'subroutine'}
    -              && exists $possibles->{'function'};
    -            
    -            if (exists $possibles->{'subroutine'}) {
    -              $node->{'bind'} = $possibles->{'subroutine'};
    -            } else {
    -              $node->{'bind'} = $possibles->{'function'};
    -            }
    -          }
    -
    -          # Recurse.
    -          map { traverse ($_) } @{$node->{'ocontains'}}
    -          if exists $node->{'ocontains'};
    -        }
    -        map { traverse ($_) } @{$top->{'ocontains'}};
    -      }
    -
    -      my @return_val = end_nest ($1, $2);
    -
    -      # Subroutines and functions in interface blocks must be noted at the
    -      # top level.  We do this with "interface" structures with the names
    -      # of the actual contained routines (unless this is already the
    -      # case).  Make sense?
    -      if ($top->{'type'} eq "interface" && $top->{'name'} eq "") {
    -          my $sub;
    -          foreach $sub (@{$top->{'ocontains'}}) {
    -              next if $sub->{'name'} eq $top->{'name'} ||
    -                      $sub->{'type'} eq "mprocedure";
    -
    -              my %copy = %$top;
    -              $copy{'name'} = $sub->{'name'};
    -              new_nest (\%copy);
    -              my $old_within = $sub->{'within'};
    -              new_struct ($sub);
    -              $sub->{'within'} = $old_within;
    -              end_nest ('interface', $sub->{'name'});
    -          }
    -      }
    -
    -      return @return_val;
    -   }
    -
    -   # SUBROUTINE/FUNCTION
    -   elsif ($line =~ /^(?:(.+?)\s+)?(subroutine|function)\s+(\w+)\s*(\([^()]*\))?(?:\s*result\s*\(\s*(\w+)\s*\))?$/i) {
    -      my ($type, $name, $parmstr, $rtype, $result) =
    -         (lc $2, $3,    $4,       $1,     $5);
    -
    -      die "Start of $type $name before `contains' section of $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -          if defined $stmts::topnest && ! $stmts::topnest->{'incontains'} &&
    -             $stmts::topnest->{'type'} ne "interface";
    -      if (exists $stmts::nesting_by{'subroutine'} ||
    -          exists $stmts::nesting_by{'function'}) {
    -         my $n = 0;
    -         $n += scalar @{$stmts::nesting_by{'subroutine'}}
    -            if exists $stmts::nesting_by{'subroutine'};
    -         $n += scalar @{$stmts::nesting_by{'function'}}
    -            if exists $stmts::nesting_by{'function'};
    -#FIXME  #die "Routine nested in routine nested in routine" if $n > 1;
    -      }
    -
    -      $parmstr = "()" unless defined $parmstr;
    -      $parmstr = utils::trim (substr ($parmstr, 1, length ($parmstr) - 2));
    -      my (@parms);
    -      if ($parmstr) {
    -         @parms = split (/\s*,\s*/, $parmstr);
    -         my ($parm);
    -         foreach $parm (@parms) {
    -            die "Parameter `$parm' is not just a word or *"
    -              unless $parm =~ /^\w+|\*$/;
    -            ## * as a final argument allows the calling to specify a statement
    -            ## to jump as an alternative return address.  (Legacy Fortran!)
    -            ## Thanks to Art Olin for this info.
    -         }
    -      } else {
    -         @parms = ();
    -      }
    -
    -      my $struct = {
    -         'type'      => $type,
    -         'name'      => $name,
    -         'parms'     => \@parms,
    -         hashed_comments ()
    -      };
    -      new_nest ($struct);
    -
    -      $struct->{'result'} = $result if defined $result;
    -
    -      $rtype = "" unless defined $rtype;
    -      while ($rtype =~ /(?:^|\s+)(recursive|pure|elemental)$/i ||
    -             $rtype =~ /^(recursive|pure|elemental)(?:\s+|$)/i) {
    -        $rtype = $` . $'; # actually whichever is not blank
    -        $struct->{lc $1} = 1;
    -      }
    -      if ($rtype ne '') {
    -        $struct->{'rtype'} = parse_type ($rtype);
    -        new_struct ({
    -          'type'        => 'var',
    -          'name'        => (defined $result ? $result : $name),
    -          'vartype'     => $struct->{'rtype'},
    -          'comments'    => ''
    -        });
    -      }
    -
    -      return ($type, $struct);
    -   }
    -
    -   # TYPE definition (must go before variable declarations)
    -   elsif ($line =~ /^type(?:\s+|\s*(,.*)?::\s*)(\w+)$/i) {
    -     my $struct = new_nest ({
    -       'type' => 'type',
    -       'name' => $2,
    -       hashed_comments ()
    -     });
    -     if (defined $1) {
    -       my $attrib = utils::trim (substr ($1, 1));
    -       if ($attrib =~ /^(public|private)$/i) {
    -         $struct->{'vis'} = lc $attrib;
    -       } elsif ($attrib) {
    -         warn "Invalid attribute `$attrib' for derived-type declaration--should be just public or private";
    -       }
    -     }
    -     return $struct;
    -   }
    -
    -   # INTERFACE block (for overloading) or statement (for definition of external)
    -   elsif ($line =~ /^interface(?:\s+(\S.+))?$/i) {
    -       return new_nest ({
    -           'type' => 'interface',
    -           'name' => (defined $1 ? $1 : ""),
    -           hashed_comments ()
    -       });
    -   }
    -
    -   # CONTAINS
    -   elsif ($line =~ /^contains$/i) {
    -      die "`contains' found at top level" unless defined $stmts::topnest;
    -      die "`contains' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'incontains'};
    -      die "Multiple `contains' found in same scope"
    -         if $stmts::topnest->{'incontains'};
    -      die "`contains' found in interface definition"
    -         if $stmts::topnest->{'interface'};
    -      $stmts::topnest->{'incontains'} = 1;
    -      return ("contains", $stmts::topnest);
    -   }
    -
    -   # PUBLIC/PRIVATE/SEQUENCE
    -   elsif ($line =~ /^(public|private|sequence)(?=\s+[^=(]|::|$)(\s*::\s*)?/i) {
    -     my ($what, $rest) = (lc $1, $');
    -
    -     if (defined $stmts::topnest && $stmts::topnest->{'type'} eq "type") {
    -       die "public statement not allowed in a type declaration"
    -         if $what eq 'public';
    -       die "$1 cannot be qualified inside type declaration" if $rest;
    -       $stmts::topnest->{$what . 'type'} = 1;
    -       return ($what);
    -     } else {
    -       die "sequence statement only allowed immediately inside type declaration"
    -         if $1 eq 'sequence';
    -
    -       die "$1 statement not immediately inside a module or type declaration"
    -         unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "module";
    -       if ($rest eq "") {  # Unqualified
    -         die "Unqualified $what in addition to unqualified " .
    -           $stmts::topnest->{'defaultvis'}
    -         if exists $stmts::topnest->{'defaultvis'};
    -         $stmts::topnest->{'defaultvis'} = $what;
    -         return ($what);
    -         
    -       } else {  # Qualified
    -         my @namelist = map {
    -           die "Invalid name `$_' specified in $what statement"
    -             unless /^\s*(\w+)(?:\s*(\([^()]+\)))?\s*$/i;
    -           $1 . (defined $2 ? $2 : "");
    -         } (split ',', $rest);
    -         push @{$stmts::topnest->{"${what}list"}}, @namelist;
    -         return ($what, @namelist);
    -       }
    -     }
    -   }
    -
    -    # OPTIONAL
    -    elsif ($line =~ /^optional(\s+|\s*::\s*)((\w|\s|,)+)$/i) {
    -        my $name;
    -        my @namelist = split (/\s*,\s*/, utils::trim ($2));
    -        foreach $name (@namelist) {
    -            do_attrib ($name, "optional", 1, "optional attribute");
    -        }
    -        return ('optional', @namelist);
    -    }
    -
    -   # Variable declarations
    -   elsif ($line =~ /^(integer|real|double\s*precision|character|complex|logical|type)\s*(\(|\s\w|[:,*])/i) {
    -      my ($vartype, $rest) = parse_part_as_type ($line);
    -      my (@attribs, @right);
    -      if ($rest =~ /^(.*)\:\:(.*)/) {
    -         my ($a, $b) = ($1, $2);
    -         @attribs = map (( utils::trim ($_) ), utils::balsplit (",", $a));
    -         @right = map (( utils::trim ($_) ), utils::balsplit (",", $b));
    -      } else {
    -         @attribs = ();
    -         @right = map (( &utils::trim ($_) ), utils::balsplit (",", $rest));
    -      }
    -      my ($r, @structs);
    -      foreach $r (@right) {
    -          my ($rl, $rassign) = &utils::balsplit ("=", $r);
    -          my ($rll, $starpart) = &utils::balsplit ("*", $rl);
    -          if (defined $starpart) {
    -            die "Sorry, I don't support 'character var*kind' yet; use 'character*kind var' instead";
    -          }
    -          $rll =~ /^ (\w+) (\s* \(.*\))? \s* $/x
    -              or die "Invalid variable declaration `$rll'";
    -          my ($name, $dimension) = ($1, $2);
    -          my ($initop, $initial);
    -          if (defined $rassign) {
    -            # implicit lead =
    -            $rassign =~ /^ (>?) \s* (.*) $/x
    -              or die "Invalid variable initialization `= $rassign'";
    -            ($initop, $initial) = ("=" . $1, $2);
    -          }
    -
    -          my $struct;
    -          $struct = {
    -              'type'        => 'var',
    -              'name'        => $name,
    -              'vartype'     => $vartype,
    -              hashed_comments ()
    -          };
    -          if (defined $initial) {
    -            $struct->{'initop'} = $initop;
    -            $struct->{'initial'} = expr_parse::parse_expr ($initial);
    -          }
    -          new_struct ($struct);
    -          push @structs, $struct;
    -
    -          my @attribs_copy = @attribs;
    -          push @attribs_copy, "dimension $dimension" if defined $dimension;
    -
    -          my ($attrib, @tempattribs);
    -          foreach $attrib (@attribs_copy) {
    -              if ($attrib =~ /^(public|private)$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{'vis'} = $attrib;
    -              } elsif ($attrib =~ /^optional$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{$attrib} = 1;
    -              } elsif ($attrib) {
    -                  warn "Unrecognized attribute `$attrib'"
    -                      unless $stmts::disable_warns;
    -                  push @tempattribs, $attrib;
    -              }
    -          }
    -
    -          $struct->{'tempattribs'} = \@tempattribs;
    -      }
    -
    -      return ('var', @structs);
    -   }
    -
    -   # USE
    -   elsif ($line =~ /^use\s+(\w+)($|,\s*)/i) {
    -      die "`use' found at top level" unless defined $stmts::topnest;
    -      die "`use' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'uses'};
    -      my $extra = length $' ? $' : undef;
    -      push @{$stmts::topnest->{'uses'}}, [$1, $extra];
    -
    -      return ('use', $1, $extra);
    -   }
    -   
    -   # CALL or IF (...) CALL [hack--xxx]
    -   elsif ($line =~ /^(?:if\s*\(.*\)\s*)?call\s+(\w+)\s*(?:\(\s*(.*?)\s*\))?$/i) {
    -      die "`call' found at top level" unless defined $stmts::topnest;
    -      die "`call' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'calls'};
    -      $stmts::topnest->{'calls'}->{$1} = 1;
    -      my @args = ();
    -      @args = split /\s*,\s*/, $2 if defined $2;
    -      return ('call', @args);
    -   }
    -   
    -   # Unrecognized statement
    -   else {
    -      if ($line =~ /^\w+/) {
    -         warn "Unrecognized statement beginning with word $&" unless $stmts::disable_warns;
    -      } else {
    -         warn "Unrecognized statement" unless $stmts::disable_warns;
    -      }
    -      return ('?', $line);
    -   }
    -}
    -
    -#####
    -# Returns a list that would fit right into a hash table you're making.  If
    -# there are no comments, returns the empty list.  The entry is called
    -# 'comments'.
    -#####
    -sub hashed_comments {
    -   if ($stmts::comments) {
    -      return ( 'comments', &$stmts::comments () );
    -   } else {
    -      return ();
    -   }
    -}
    -
    -#####
    -# Makes note of a new structure.  Called by new_nest, for example.
    -#####
    -sub new_struct {
    -   my ($struct) = @_;
    -   my $type = $struct->{'type'};
    -
    -   die "Basic structure must be found at a nesting level"
    -     unless defined $stmts::topnest;
    -
    -   if (exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}})) {
    -      die "Redefinition of $type $struct->{'name'} in $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -         if exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type});
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type} = $struct;
    -   } else {
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}} =
    -         { $type => $struct };
    -   }
    -   push @{$stmts::topnest->{'ocontains'}}, $struct;
    -   $struct->{'within'} = $stmts::topnest;
    -}
    -
    -#####
    -# Starts a new nesting level represented by the given structure.  The
    -# structure must define the 'type' and 'name' entries.  You should not
    -# define the 'contains' or 'defaultvis' entry.
    -#####
    -sub new_nest {
    -   my ($struct) = @_;
    -   my ($type) = $struct->{'type'};
    -
    -   $struct->{'contains'} = { };
    -   $struct->{'ocontains'} = [ ];
    -
    -   # Program unit
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "module" || $type eq "program") {
    -     $struct->{'incontains'} = 0;
    -     $struct->{'uses'} = [ ];
    -     $struct->{'interface'} = 0 if $type eq "subroutine" || $type eq "function";
    -   }
    -
    -   # Program unit with code
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "program") {
    -     $struct->{'calls'} = { };
    -   }
    -
    -   if (defined $stmts::topnest) {
    -      my ($toptype) = $stmts::topnest->{'type'};
    -      if ($toptype eq "interface" && ($struct->{'type'} eq "subroutine" || $struct->{'type'} eq "function")) {
    -         $struct->{'interface'} = 1;
    -      } else {
    -         die "Nesting in $toptype not allowed" unless $toptype eq "subroutine" || $toptype eq "function" || $toptype eq "module" || $toptype eq "program";
    -      }
    -      new_struct ($struct) unless $struct->{'name'} eq "";
    -   }
    -   push @stmts::nesting, $struct;
    -   if (exists ($stmts::nesting_by{$type})) {
    -      push @{$stmts::nesting_by{$type}}, $struct;
    -   } else {
    -      $stmts::nesting_by{$type} = [ $struct ];
    -   }
    -   $stmts::topnest = $struct;
    -   return ( $type, $struct );
    -}
    -
    -#####
    -# Ends the current nesting level.  Optionally, you can pass the 'type' that
    -# it's supposed to be as the first argument.  Optionally, you can pass the
    -# 'name' it should have after that (as the second argument).
    -#####
    -sub end_nest {
    -  my ($type, $name) = @_;
    -  $type = lc $type if defined $type;
    -  unless (defined $stmts::topnest) {
    -    if (defined $name && defined $type) {
    -      die "Ended $type $name at top level";
    -    } elsif (defined $type) {
    -      die "Ended unnamed $type at top level";
    -    } else {
    -      die "END statement at top level";
    -    }
    -  }
    -  my ($struct) = pop @stmts::nesting;
    -  die "Ended $type while in $struct->{'type'} $struct->{'name'}"
    -    if defined $type && $type ne $struct->{'type'};
    -  die "Ended $name while in $struct->{'type'} $struct->{'name'}"
    -    if defined $name && $name !~ /^\Q$struct->{'name'}\E$/i;
    -  if (@stmts::nesting) {
    -    $stmts::topnest = $stmts::nesting[$#stmts::nesting];
    -  } else {
    -    $stmts::topnest = undef;
    -  }
    -  pop @{$stmts::nesting_by{$struct->{'type'}}};
    -  return ( "end" . (defined $type ? $type : ''), $struct );
    -}
    -
    -#####
    -# Parses the basic type that prefixes the given string.
    -# Returns (parsed type, string portion remaining).
    -#####
    -sub parse_part_as_type {
    -  my ($str) = @_;
    -
    -  $str =~ /^integer|real|double\s*precision|character|complex|logical|type/i
    -    or die "parse_part_as_type: Invalid input `$str'";
    -  my ($base, $rest) = ($&, $');
    -
    -  my $level = 0;
    -  ## Wait till we are outside of all parens and see a letter, colon, or comma.
    -  while ($rest =~ /[()a-zA-Z_:,]/g) {
    -    if ($& eq '(') {
    -      $level++;
    -    } elsif ($& eq ')') {
    -      $level--;
    -      die "Unbalanced parens (too many )'s)" if $level < 0;
    -    } elsif ($level == 0) {
    -      return (parse_type ($base . $`), $& . $');
    -    }
    -  }
    -  
    -  die "Couldn't split into type and rest for `$str'";
    -
    -# Some old, presumably less-efficient code:
    -#  my ($level, $len) = (0, length ($str));
    -#  my ($i, $c);
    -#  for ($i = length ($&); $i < $len; $i++) {
    -#    $c = substr ($str, $i, 1);
    -#    if ($c eq "(") {
    -#      $level++;
    -#    } elsif ($c eq ")") {
    -#      $level--;
    -#      die "Unbalanced parens (too many )'s)" if $level < 0;
    -#    } elsif ($level == 0 && $c =~ /^\w|:|,$/) {
    -#      last;
    -#    }
    -#  }
    -#  return (parse_type (substr ($str, 0, $i)), substr ($str, $i));
    -}
    -
    -#####
    -# Parses a basic type, creating a type structure for it:
    -#     integer [( [kind=] kind_val )]
    -#     real [( [kind=] kind_val )]
    -#     double precision                  (no kind is allowed)
    -#     complex [( [kind=] kind_val )]
    -#     character [( char_stuff )]
    -#     logical [( [kind=] kind_val )]
    -#     type (type_name)
    -#
    -# integer*number, real*number, complex*number, and logical*number are also
    -# supported as nonstandard Fortran extensions for kind specification.
    -# "number" can either be a direct integer or an expression in parentheses.
    -# 
    -# char_stuff is empty or (stuff), where stuff is one of:
    -#     len_val [, [kind=] kind_val]
    -#     kind=kind_val [, [len=] len_val]
    -#     len=len_val [, kind=kind_val]
    -# kind_val and len_val are expressions; len_val can also be just `*'.
    -# 
    -# The length can also be specified using the nonstandard Fortran extension
    -# character*number.  If number is `*', it must be in parentheses (indeed,
    -# any expression other than a number must be in parentheses).
    -#####
    -sub parse_type {
    -  my ($str) = @_;
    -
    -  # print "Parsing type: $str\n";
    -
    -  $str = utils::trim ($str);
    -  $str =~ /^(integer|real|double\s*precision|complex|character|logical|type)
    -    \s* (?: \( (.*) \) | \* \s* (\d+ | \(.*\)) )?$/ix
    -    or die "Invalid type `$str'";
    -  my $base = lc $1;
    -
    -  if ($base =~ /^double\s*precision$/) {
    -    die "double precision cannot have kind specification"
    -      if defined $2 || defined $3;
    -    return $typing::double_precision;
    -  }
    -
    -  if (defined $2 || defined $3) {
    -    my $star = defined $3;
    -    my $args = utils::trim ($star ? $3 : $2);
    -
    -    if ($base eq 'type') {
    -      die "type$args invalid--use type($args)" if $star;
    -      die "type(w) for non-word w" unless $args =~ /^\w+$/;
    -      return typing::make_type ($base, $args);
    -    } elsif ($base eq 'character') {
    -      my ($kind, $len, $rest);
    -      if ($star) {
    -        if ($args =~ /^\(\s*\*\s*\)$/) {
    -          $len = '*';
    -        } else {
    -          $len = expr_parse::parse_expr ($args);
    -        }
    -      } elsif ($args =~ /^kind\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        ($kind, $rest) = expr_parse::parse_part_as_expr ($args);
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ s/^len\s*=\s*//i;
    -          $len = ($rest eq '*' ? '*' : expr_parse::parse_expr ($rest));
    -        }
    -      } elsif ($args =~ /^len\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = '*';
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ /^kind\s*=\s*/
    -            or die "kind= specifier needed when len= specifier is given";
    -          $rest = substr ($rest, length ($&));
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      } else {  # len
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = "*";
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest = substr ($rest, length ($&)) if $rest =~ /^kind\s*=\s*/i;
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      }
    -      return typing::make_character_type ($kind, $len);
    -    } else {
    -      $args =~ s/^kind\s*=\s*//i unless $star;
    -      return typing::make_type ($base, expr_parse::parse_expr ($args));
    -    }
    -  } else {
    -    die "type without (type-name) after it" if $base eq 'type';
    -    die "No default type for `$base'"
    -      unless exists $typing::default_type{$base};
    -    return $typing::default_type{$base};
    -  }
    -}
    -
    -sub do_attrib {
    -    my ($name, $attrib, $val, $attribname) = @_;
    -    my ($struct);
    -    foreach $struct (values %{$stmts::topnest->{'contains'}->{lc $name}}) {
    -        die "Redefining $attribname of $struct->{'type'} $name from " .
    -            "$struct->{$attrib} to $val" if exists $struct->{$attrib};
    -        $struct->{$attrib} = $val;
    -    }
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/typing.pl b/Tools/F_scripts/f90doc/typing.pl
    deleted file mode 100644
    index 9347b8bbb16..00000000000
    --- a/Tools/F_scripts/f90doc/typing.pl
    +++ /dev/null
    @@ -1,516 +0,0 @@
    -package typing;
    -
    -use strict;
    -
    -# Stores the type of each variable.
    -$typing::typeof = "";
    -# Stack: one typeof per scope.
    -@typing::typeofs = ();
    -
    -# Stores the definition of each type.
    -$typing::typedef = "";
    -# Stack: one typedef per scope.
    -@typing::typedefs = ();
    -
    -# Stores the definition of each function/operator.
    -$typing::code = "";
    -# Stack: one code per scope.
    -@typing::codes = ();
    -
    -
    -# DOUBLE PRECISION type.
    -$typing::double_precision = typing::make_type ('real', 8, "double precision");
    -
    -# Default character kind.
    -$typing::default_character_kind = 1;
    -
    -# Default types.
    -%typing::default_type = (
    -  'complex' => typing::make_type ('complex', 8, "complex"),
    -  'integer' => typing::make_type ('integer', 4, "integer"),
    -  'logical' => typing::make_type ('logical', 1, "logical"),
    -  'real'    => typing::make_type ('real', 4, "real"),
    -);
    -$typing::default_type{'character'} = typing::make_character_type ();
    -
    -# Types with wild sub and any other info (just a base defined).
    -$typing::wild_type = {
    -   'complex'   => typing::make_type ('complex'),
    -   'real'      => typing::make_type ('real'),
    -   'integer'   => typing::make_type ('integer'),
    -   'logical'   => typing::make_type ('logical'),
    -   'character' => typing::make_type ('character')
    -};
    -
    -
    -# Precedence of operations; based on that which is in expr_parse.y.
    -# Higher precedence indicated by larger number.
    -$typing::precedence = {
    -  '.eqv.'  => 1,
    -  '.neqv.' => 1,
    -  '.or.'   => 2,
    -  '.and.'  => 3,
    -  '.not.'  => 4,
    -  '<'      => 5,
    -  '>'      => 5,
    -  '<='     => 5,
    -  '>='     => 5,
    -  '=='     => 5,
    -  '/='     => 5,
    -  '//'     => 6,
    -  '+'      => 7,
    -  '-'      => 7,
    -  'u+'     => 8,
    -  'u-'     => 8,
    -  '*'      => 9,
    -  '/'      => 9,
    -  '**'     => 10,
    -  '%'      => 11,
    -  '%call'  => 11,
    -  '%colon' => 30, # this is a guess
    -  '%namedarg' => 30, # this is a guess
    -  '%array' => 40,    # as in "forty days and forty nights," which means
    -  '%const' => 40,    #    "a long time," here we use 40 as an approx. to infty.
    -  '%var'   => 40,
    -  '%do'    => 40,
    -};
    -
    -#####
    -# Starts a new scope.  If this is a top-level scope, initializes the codes
    -# to intrinsics and the like.
    -#####
    -sub new_scope {
    -   my ($newtypeof, $newtypedef, $newcode);
    -
    -   if (@typing::typeofs) {
    -      $typing::typeof = utils::copy_hash ($typing::typeof);
    -      $typing::typedef = utils::copy_hash ($typing::typedef);
    -      $typing::code = utils::copy_hash ($typing::code);
    -   } else {
    -      $typing::typeof = {};
    -      $typing::typedef = {};
    -      $typing::code = {};
    -      $typing::code{"//"} = [ {
    -         'parms' => [ $typing::wild_type{'character'},
    -                      $typing::wild_type{'character'} ],
    -         'return' => $typing::wild_type{'character'}
    -      } ];
    -      my ($int, $real, $logical, $char) = ( $typing::wild_type{'integer'},
    -         $typing::wild_type{'real'}, $typing::wild_type{'logical'},
    -         $typing::wild_type{'character'} );
    -      my ($op);
    -      foreach $op ("+", "-", "*", "/") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $int },
    -            { 'parms' => [ $real, $int ], 'return' => $real },
    -            { 'parms' => [ $int, $real ], 'return' => $real },
    -            { 'parms' => [ $real, $real ], 'return' => $real }
    -         ];
    -      }
    -      $typing::code->{"**"} = [
    -         { 'parms' => [ $int, $int ], 'return' => $int },
    -         { 'parms' => [ $real, $int ], 'return' => $real },
    -         { 'parms' => [ $int, $real ], 'return' => $real },
    -         { 'parms' => [ $real, $real ], 'return' => $real },
    -      ];
    -      foreach $op ("u+", "u-") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int ], 'return' => $int },
    -            { 'parms' => [ $real ], 'return' => $real }
    -         ];
    -      }
    -      foreach $op ("<", "<=", "==", "/=", ">", ">=") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $logical },
    -            { 'parms' => [ $real, $int ], 'return' => $logical },
    -            { 'parms' => [ $int, $real ], 'return' => $logical },
    -            { 'parms' => [ $real, $real ], 'return' => $logical },
    -            { 'parms' => [ $char, $char ], 'return' => $logical }
    -         ];
    -      }
    -      foreach $op (".or.", ".and.", ".eqv.", ".neqv.") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $logical, $logical ], 'return' => $logical }
    -         ];
    -      }
    -      $typing::code->{".not."} = [
    -         { 'parms' => [ $logical ], 'return' => $logical }
    -      ];
    -      $typing::code->{"//"} = [
    -         { 'parms' => [ $char, $char ], 'return' => $char }
    -      ];
    -   }
    -
    -   push @typing::typeofs, $typing::typeof;
    -   push @typing::typedefs, $typing::typedef;
    -   push @typing::codes, $typing::code;
    -}
    -
    -#####
    -# Ends an old scope.
    -#####
    -sub end_scope {
    -   pop @typing::typeofs;
    -   pop @typing::typedefs;
    -   pop @typing::codes;
    -
    -   if ($typing::typeofs) {
    -      $typing::typeof = $typing::typeofs[$#typing::typeofs];
    -      $typing::typedef = $typing::typedefs[$#typing::typedefs];
    -      $typing::code = $typing::codes[$#typing::codes];
    -   }
    -}
    -
    -#####
    -# Creates a new type with specified base and sub.
    -# Note that sub corresponds to kind for built-in types.
    -# sub can be left out for a wild type.
    -# A third argument, print, can specify how the type should print.  Used for
    -# default types, double precision, etc.
    -#####
    -sub make_type {
    -  my ($base, $sub, $print) = @_;
    -  my $type = { 'base' => $base };
    -  $type->{'sub'} = $sub if $sub;
    -  $type->{'print'} = $print;
    -  return $type;
    -}
    -
    -#####
    -# Creates a new complex type with specified types of "sides."
    -#####
    -sub make_complex_type {
    -  my ($type1, $type2) = @_;
    -  my ($base1, $base2) = ($type1->{'base'}, $type2->{'base'});
    -  die "Complex constant must have real and/or integer parts, but I found types $base1 and $base2"
    -    unless ($base1 eq 'integer' || $base1 eq 'real') &&
    -           ($base2 eq 'integer' || $base2 eq 'real');
    -  my $which;
    -  # From Metcalf and Reed's Fortran 90 Explained, if one of the types is an
    -  # integer then the kind of the complex is the kind of the other type.
    -  if ($base1 eq 'integer') {
    -    $which = $type2;
    -  } elsif ($base2 eq 'integer') {
    -    $which = $type1;
    -  } else {
    -    if ($type1->{'sub'} > $type2->{'sub'}) {
    -      $which = $type1;
    -    } else {
    -      $which = $type2;
    -    }
    -  }
    -  return {
    -    'base'    => 'complex',
    -    'sub'     => $which
    -  };
    -}
    -
    -#####
    -# Creates a new character type with specified sub (kind) and len.
    -#####
    -sub make_character_type {
    -  my ($sub, $len) = @_;
    -  $sub = $typing::default_character_kind unless defined $sub;
    -  $sub = [ "%const", $typing::default_type{'integer'}, $sub ] unless ref $sub;
    -  $len = "1" unless defined $len;
    -  $len = [ "%const", $typing::default_type{'integer'}, $len ]
    -    unless ref $len || $len eq "*";
    -  return {
    -    'base' => 'character',
    -    'sub'  => $sub,
    -    'len'  => $len
    -  };
    -}
    -
    -#####
    -# Returns true iff the given type was created to be the default of its kind.
    -# This has no meaning for compound types (hence it returns false).  For
    -# characters, there's a slight bug in that it will say that the type was
    -# created default even if you specify the default explicitly.  No biggie.
    -# Note that the defaultness is only for the KIND, not the LENGTH.
    -# 
    -# I could fix the above-mentioned problem by storing a 'default' entry just for
    -# the default types.  Then is_default_kind just translates to an exists test.
    -# This is much simpler and avoids the weird checks for double precision numbers
    -# (0.0d0 ==> don't show a kind.  This is really "default").  This would be
    -# kinda nice but 'default' is probably the wrong word.
    -#####
    -sub is_default_kind {
    -   my ($type) = @_;
    -
    -   if ($type->{'base'} eq "character") {
    -     my ($top, @rest) = @{$type->{'sub'}};
    -     return ($top eq "%const" && $rest[0] eq $typing::default_type{'integer'}
    -          && $rest[1] == $typing::default_character_kind);
    -   } else {
    -      return (exists $typing::default_type{$type->{'base'}} && $typing::default_type{$type->{'base'}} eq $type);
    -   }
    -}
    -
    -#####
    -# Converts the given type to a string, written in Fortran 90 code.
    -# Only displays the kind if it was specified explicitly.  Slight bug:
    -# if you say character (kind=1) :: c, then it will print character :: c.
    -# (This is only for characters with default kind.  For other types with
    -# default kind explicitly specified, it is printed.)
    -#####
    -sub type_to_f90 {
    -  my ($type) = @_;
    -
    -  # This covers the case where the kind is the default, except for characters.
    -  return $type->{'print'} if defined $type->{'print'};
    -
    -  my $mods = "";
    -  if ($type->{'base'} eq "character") {
    -    if ($type->{'len'} eq "*") {
    -      $mods = "len=*";
    -    } elsif ($type->{'len'}->[0] ne "%const" ||
    -             $type->{'len'}->[1] != $typing::default_type{'integer'} ||
    -             $type->{'len'}->[2] ne "1") {
    -      $mods = "len=" . expr_to_f90 ($type->{'len'});
    -    }
    -    unless (is_default_kind ($type)) {
    -      $mods .= ", " unless $mods eq '';
    -      $mods .= "kind=" . expr_to_f90 ($type->{'sub'});
    -    }
    -  } elsif ($type->{'base'} eq "type") {
    -    $mods = "$type->{'sub'}";
    -  } else {
    -    $mods = "kind=" . expr_to_f90 ($type->{'sub'});
    -  }
    -  $mods = " ($mods)" unless $mods eq '';
    -  return $type->{'base'} . $mods;
    -}
    -
    -#####
    -# Converts an expression right back to a string, doing "no" conversion (i.e.,
    -# output is in Fortran 90).  Optionally returns the precedence of the outmost
    -# operation in the expression (see $typing::precedence).
    -#####
    -sub expr_to_f90 {
    -  my ($exprptr) = @_;
    -  my ($op, @children) = @$exprptr;
    -
    -  die "Unrecognized operation $op",%$op," (has no precedence?)"
    -    unless exists $typing::precedence->{$op};
    -  my $prec = $typing::precedence->{$op};
    -
    -  my $answer;
    -  if ($op eq "%") {
    -    my ($struct, $elem) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($struct);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s%$elem";
    -  } elsif ($op eq "%var") {
    -    $answer = $children[0];
    -  } elsif ($op eq "%const") {
    -    my ($type, $val) = @children;
    -    if ($type->{'base'} eq 'complex') {
    -      if (!is_default_kind ($type->{'sub'})) {
    -        my ($k1, $k2) = ("", "");
    -        $k1 = "_$type->{'sub'}->{'sub'}" unless $val->[0] =~ /D[+-]?\d+$/i;
    -        $k2 = "_$type->{'sub'}->{'sub'}" unless $val->[1] =~ /D[+-]?\d+$/i;
    -        $answer = "($val->[0]$k1, $val->[1]$k2)";
    -      } else {
    -        $answer = "($val->[0], $val->[1])";
    -      }
    -    } elsif (is_default_kind ($type) || $val =~ /D[+-]?\d+$/i) {
    -      $answer = $val;
    -    } else {
    -      $answer = "${val}_$type->{'sub'}";
    -    }
    -  } elsif ($op eq "%array") {
    -    $answer = "(/ " . join (", ", map { (expr_to_f90 ($_))[0] } @children)
    -            . " /)";
    -  } elsif ($op eq "%colon") {
    -    my ($left, $right) = @children;
    -    $left = (expr_to_f90 ($left))[0] if $left ne '';
    -    $right = (expr_to_f90 ($right))[0] if $right ne '';
    -    $answer = $left . ":" . $right;  # : has ultimately low precedence
    -  } elsif ($op eq "%namedarg") {
    -    my ($left, $right) = @children;
    -    $answer = $left . " = " .
    -              (expr_to_f90 ($right))[0];  # = has ultimately low precedence
    -  } elsif ($op eq "%do") {
    -    my ($child, $var, @args) = @children;
    -    $answer = "(" . expr_to_f90 ($child) . ", " . $var . " = " .
    -              join (", ", map { (expr_to_f90 ($_))[0] } @args) . ")";
    -  } elsif ($op eq "%call") {
    -    ($op, @children) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($op);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s (" . join (", ", map ((expr_to_f90 ($_))[0], @children))
    -      . ")";
    -  } elsif (scalar @children == 1) {
    -    $op = substr ($op, 1) if substr ($op, 0, 1) eq 'u';
    -    my ($s, $sprec) = expr_to_f90 ($children[0]);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$op$s";
    -  } elsif (scalar @children == 2) {
    -    my ($s1, $sprec1) = expr_to_f90 ($children[0]);
    -    $s1 = "($s1)" if $prec > $sprec1;
    -    my ($s2, $sprec2) = expr_to_f90 ($children[1]);
    -    $s2 = "($s2)" if $prec > $sprec2;
    -    $answer = "$s1 $op $s2";
    -  } else {
    -    die "expr_to_f90: Unrecognized operation $op with " . (scalar @children) .
    -      " children";
    -  }
    -
    -  if (wantarray) {
    -    return ($answer, $prec);
    -  } else {
    -    return $answer;
    -  }
    -}
    -
    -#####
    -# Computes the type of the given expression (which is passed by reference).
    -# Returns a reference to the actual type.
    -#####
    -sub expr_type {
    -   my ($exprptr) = @_;
    -   my ($op, @children) = @$exprptr;
    -
    -   if ($op eq "%") {
    -      my ($struct, $elem) = @children;
    -      my ($type) = expr_type ($struct);
    -      die "expr_type: \%$elem failed: left part is not a compound type" unless $type->{'base'} eq "type";
    -      my ($typedef) = $typing::typedef->{$type->{'sub'}};
    -      my ($elemtype) = $typedef->{$elem};
    -      die "expr_type: \%$elem failed: left part does not include $elem" unless $elemtype;
    -      return $elemtype;
    -   } elsif ($op eq "%var") {
    -      my ($var) = @children;
    -      my ($vartype) = $typing::typeof->{$var};
    -      die "expr_type: Variable $var undefined" unless $vartype;
    -      return $vartype;
    -   } elsif ($op eq "%const") {
    -      my ($type, $val) = @children;
    -      return $type;
    -   } elsif ($op eq "%array") {
    -      # HERE
    -   } elsif ($op eq "%colon") {
    -      my ($string, $left, $right) = @children;
    -      my ($stringtype) = expr_type ($string);
    -      die "expr_type: colon notation for non-character string" if $stringtype->{'base'} ne "character";
    -      die "expr_type: colon notation for character array" if $stringtype->{'dimension'};
    -      return typing::make_character_type ($stringtype->{'sub'}, "*");
    -   } elsif ($op eq "%call") {
    -      ($op, @children) = @children;
    -      my ($subop, @subchildren) = @$op;
    -      if ($subop eq "%var") {
    -         ($op) = @subchildren;
    -         # Fall through: we allow overloaded function name in this special case.
    -      } else {
    -         # Function call without overloading or an array reference.
    -         my ($optype) = expr_type ($op);
    -
    -         if ($optype->{'dimension'}) {  # array reference
    -            return make_type ($optype->{'base'}, $optype->{'sub'});
    -         } else {
    -            die "expr_type: Array/function call for something that is neither" unless $optype->{'base'} eq "interface";
    -            # HERE function call without overloading.
    -         }
    -      }
    -   }
    -
    -   my ($opcodes) = $typing::code->{$op};
    -   die "Operation/function $op undefined" unless $opcodes;
    -   my (@childtypes) = ();
    -   my ($child);
    -   foreach $child (@children) {
    -      print "childtypes was: @childtypes\n";
    -      print "type of $child is ", expr_type ($child), "\n";
    -      push @childtypes, expr_type ($child);
    -      print "childtypes is now: @childtypes\n";
    -   }
    -   my ($opcode);
    -   foreach $opcode (@$opcodes) {
    -      print "children: @children\n";
    -      print "childtypes: @childtypes\n";
    -      if (typing::subtypes_list (\@childtypes, $opcode->{'parms'})) {
    -         my ($parm);
    -         my ($ret) = $opcode->{'return'};
    -         if ($ret->{'base'} eq "character" && ! $ret->{'len'}) {
    -            $ret->{'len'} = 0;
    -find_len:
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  if ($parm->{'len'} eq "*") {
    -                     $ret->{'len'} = "*";
    -                     last find_len;
    -                  } else {
    -                     $ret->{'len'} += $parm->{'len'};
    -                  }
    -               }
    -            }
    -         }
    -         if ($ret->{'sub'}) {
    -            return $ret;
    -         } else {
    -            # Make intrinsic type's kind: look for all parameters with the same
    -            # base type, and use the maximum kind out of those.
    -            my ($maxkind) = -1;
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  $maxkind = $parm->{'sub'} if $maxkind < $parm->{'sub'};
    -               }
    -            }
    -            die "expr_type: Internal error caused by new_scope" if $maxkind < 0;
    -            return { %$ret, 'sub' => $maxkind };
    -         }
    -      }
    -   }
    -   die "Operation/function $op defined but not for this (these) type(s)";
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type.
    -# This currently only supports intrinsic types (integer*4 subtypes integer*?).
    -#####
    -sub subtypes {
    -   my ($t1, $t2) = @_;
    -   return 0 if $t1->{'base'} ne $t2->{'base'};
    -   if ($t1->{'base'} eq "type") {
    -      return 0 if $t1->{'sub'} eq $t2->{'sub'};
    -   } else {
    -      if ($t1->{'base'} eq "character") {
    -         if ($t1->{'len'}) {
    -            return 0 unless $t1->{'len'};
    -            return 0 if $t2->{'len'} != $t1->{'len'};
    -         }
    -      }
    -      if ($t1->{'base'} eq "interface") {
    -         # HERE fill this in when I do function types ("interface").
    -      }
    -      if ($t1->{'sub'}) {
    -         return 0 unless $t1->{'sub'};
    -         return 0 if $t2->{'sub'} ne $t1->{'sub'};
    -      }
    -   }
    -   return 1;
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type, where the first
    -# and second type are (conceptually) tuples.  That is, the lengths must be
    -# equal, and each element must subtype the corresponding element.
    -# The lists are passed as references.
    -#####
    -sub subtypes_list {
    -   my ($l1ptr, $l2ptr) = @_;
    -   my (@l1) = @$l1ptr;
    -   my (@l2) = @$l2ptr;
    -   return 0 if $#l1 != $#l2;
    -
    -   print "l1 is: @l1\n";
    -   print "l2 is: @l2\n";
    -
    -   my ($i);
    -   for ($i = 0; $i <= $#l1; $i++) {
    -      print "calling subtypes with $l1[$i] and $l2[$i]\n";
    -      return 0 unless typing::subtypes ($l1[$i], $l2[$i]);
    -   }
    -   return 1;
    -}
    diff --git a/Tools/F_scripts/f90doc/utils.pl b/Tools/F_scripts/f90doc/utils.pl
    deleted file mode 100644
    index 8e409f0db1c..00000000000
    --- a/Tools/F_scripts/f90doc/utils.pl
    +++ /dev/null
    @@ -1,87 +0,0 @@
    -package utils;
    -
    -use strict;
    -
    -sub copy_list {
    -   my ($listref) = @_;
    -   my @list;
    -   @list = @$listref;
    -   \@list;
    -}
    -
    -sub copy_hash {
    -   my ($hashref) = @_;
    -   my %hash;
    -   %hash = %$hashref;
    -   \%hash;
    -}
    -
    -sub hash2str {
    -   my ($hash) = @_;
    -   my ($key, $s);
    -   $s = "{\n";
    -   foreach $key (keys %$hash) {
    -      $s .= "   $key => $hash->{$key}\n";
    -   }
    -   $s .= "}";
    -}
    -
    -sub trim {
    -   my ($s) = @_;
    -   $s =~ s/^\s*//;
    -   $s =~ s/\s*$//;
    -   $s;
    -}
    -
    -# balsplit (sep, string) splits string into pieces divided by sep when
    -# sep is "outside" ()s.  Returns a list just like split.
    -sub balsplit {
    -   my ($sep, $str) = @_;
    -   my ($i, $c);
    -   my ($len, $level, $left) = (length ($str), 0, 0);
    -   my (@list) = ();
    -
    -   for ($i = 0; $i < $len; $i++) {
    -      $c = substr ($str, $i, 1);
    -      if ($c eq "(") {
    -         $level++;
    -      } elsif ($c eq ")") {
    -         $level--;
    -         die "balsplit: Unbalanced parens (too many )'s)" if $level < 0;
    -      } elsif ($c eq $sep && $level == 0) {
    -         push (@list, substr ($str, $left, $i-$left));
    -         $left = $i + 1;
    -      }
    -   }
    -
    -   push (@list, substr ($str, $left));
    -   return @list;
    -}
    -
    -# Takes the first word of each element of the list.
    -sub leftword {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      $x =~ s/^\s*//;
    -      $x =~ /^\w*/;
    -      push (@out, $&);
    -   }
    -   @out;
    -}
    -
    -sub remove_blanks {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      push (@out, $x) unless $x =~ /^\s*$/;
    -   }
    -   @out;
    -}
    -
    -sub do_nothing {
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/fcheck.py b/Tools/F_scripts/fcheck.py
    index 20033f85ac9..f5be4efd726 100755
    --- a/Tools/F_scripts/fcheck.py
    +++ b/Tools/F_scripts/fcheck.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple routine to parse Fortran files and make sure that things are
     # declared double precision, and constants are of the form 1.0_dp_t or
    @@ -122,9 +122,3 @@ def visit(argFiles, dirname, files):
     
             if (badFile == 1):
                 print " "
    -
    -
    -
    -
    -
    -
    diff --git a/Tools/F_scripts/find_files_vpath.py b/Tools/F_scripts/find_files_vpath.py
    index c9dd5485930..a52d0f28f3d 100755
    --- a/Tools/F_scripts/find_files_vpath.py
    +++ b/Tools/F_scripts/find_files_vpath.py
    @@ -1,12 +1,10 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     """
     Take a vpath and a list of files and find where in the first vpath the
     first occurrence of the file.
     """
     
    -from __future__ import print_function
    -
     import sys
     import os
     import argparse
    diff --git a/Tools/F_scripts/findparams.py b/Tools/F_scripts/findparams.py
    index 70280b134de..79d698ade8d 100755
    --- a/Tools/F_scripts/findparams.py
    +++ b/Tools/F_scripts/findparams.py
    @@ -1,6 +1,4 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
     import os
    diff --git a/Tools/F_scripts/makebuildinfo.py b/Tools/F_scripts/makebuildinfo.py
    index e5f206339b2..4d08a571145 100755
    --- a/Tools/F_scripts/makebuildinfo.py
    +++ b/Tools/F_scripts/makebuildinfo.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple script that writes the build_info.f90 file that is used
     # to store information for the job_info file that we store in plotfiles.
    diff --git a/Tools/F_scripts/write_probin.py b/Tools/F_scripts/write_probin.py
    index 10ec4489066..54729eb5f5e 100755
    --- a/Tools/F_scripts/write_probin.py
    +++ b/Tools/F_scripts/write_probin.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     """This routine parses plain-text parameter files that list runtime
     parameters for use in our codes.  The general format of a parameter
    @@ -24,8 +24,6 @@
     
     """
     
    -from __future__ import print_function
    -
     import os
     import sys
     import argparse
    diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs
    index db1ce350e54..f3f712816a6 100644
    --- a/Tools/GNUMake/Make.defs
    +++ b/Tools/GNUMake/Make.defs
    @@ -1,8 +1,3 @@
    -# Check python version
    -my_python_version := $(word 2, $(shell python --version 2>&1))
    -ifneq ($(firstword $(sort 2.7 $(my_python_version))), 2.7)
    -  $(error Python >= 2.7 required! Your version is $(my_python_version))
    -endif
     
     ifneq (,$(findstring ~,$(AMREX_HOME)))
       $(warning *** AMREX_HOME string constains ~ and make will not like it. So it is replaced.)
    @@ -762,6 +757,7 @@ else ifeq ($(USE_CUDA),TRUE)
             LINK_WITH_FORTRAN_COMPILER=TRUE
         endif
     
    +    $(info Loading $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak...)
         include $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak
     
         ifeq ($(USE_MPI),TRUE)
    @@ -971,17 +967,17 @@ endif
     F90CACHE =
     
     ifeq ($(TP_PROFILING),VTUNE)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
     endif
     
     ifeq ($(TP_PROFILING),CRAYPAT)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
     endif
     
     ifeq ($(TP_PROFILING),FORGE)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
     endif
     
    diff --git a/Tools/GNUMake/Make.machines b/Tools/GNUMake/Make.machines
    index 6903ba05125..738461965d0 100644
    --- a/Tools/GNUMake/Make.machines
    +++ b/Tools/GNUMake/Make.machines
    @@ -60,11 +60,22 @@ ifdef OLCF_ROCM_ROOT
         which_site := olcf
         which_computer := crusher
       endif
    +
    +  ifeq ($(findstring frontier, $(host_name)), frontier)
    +    which_site := olcf
    +    which_computer := frontier
    +  endif
     endif
     
    -ifeq ($(findstring theta, $(host_name)), theta)
    -  which_site := alcf
    -  which_computer := theta
    +ifeq ($(findstring alcf.anl.gov, $(host_name)),alcf.anl.gov)
    +  ifeq ($(findstring theta, $(host_name)), theta)
    +    which_site := alcf
    +    which_computer := theta
    +  endif
    +  ifeq ($(findstring polaris, $(host_name)), polaris)
    +    which_site := alcf
    +    which_computer := polaris
    +  endif
     endif
     
     ifeq ($(findstring sierra, $(host_name)), sierra)
    diff --git a/Tools/GNUMake/Make.rules b/Tools/GNUMake/Make.rules
    index 8b014678500..48ef6d9d3f8 100644
    --- a/Tools/GNUMake/Make.rules
    +++ b/Tools/GNUMake/Make.rules
    @@ -441,7 +441,7 @@ $(tmpEXETempDir)/%.F.orig: %.F
     # & --> *
     $(tmpEXETempDir)/%-cppd.h: %.H
     	@if [ ! -d $(tmpEXETempDir) ]; then mkdir -p $(tmpEXETempDir); fi
    -	$(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c99 $< -o $@
    +	$(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c11 $< -o $@
     	@$(SHELL) -ec 'sed -i -e '\''s/amrex::Real/$(amrex_real)/g'\'' $@ ; \
     	               sed -i -e '\''s/amrex_real/$(amrex_real)/g'\''  $@ ; \
     	               sed -i -e '\''s/amrex_particle_real/$(amrex_particle_real)/g'\''  $@ ; \
    @@ -512,9 +512,14 @@ endif
     # e.g. libraries, simply do "make print-libraries".  This will
     # print out the value.
     print-%:
    -	@echo $* is '$($*)'
    +	@echo $* is "$($*)"
     	@echo '    origin = $(origin $*)'
    -	@echo '     value = $(value  $*)'
    +	@echo '     value = $(subst ','"'"',$(value  $*))'
    +# We need to use subst on the result of $(value) because it contains single
    +# quotes.  Shell command echo does not like things like 'x'$(filiter-out)'y',
    +# because what it sees is 'x', $(filter-out), and 'y'.  With the substition, it
    +# will see 'x', "'", '$(filter-out)', "'", and 'y', with $(filter-out) inside a
    +# pair of single quotes.
     
     .PHONY: help
     help:
    diff --git a/Tools/GNUMake/comps/armclang.mak b/Tools/GNUMake/comps/armclang.mak
    index efe4a718106..d2826cb1134 100644
    --- a/Tools/GNUMake/comps/armclang.mak
    +++ b/Tools/GNUMake/comps/armclang.mak
    @@ -57,18 +57,18 @@ ifeq ($(WARN_ERROR),TRUE)
     endif
     
     # disable some warnings
    -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    +CXXFLAGS += -Wno-c++17-extensions
     
     ########################################################################
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FMODULES = -J$(fmoddir) -I $(fmoddir)
     
    diff --git a/Tools/GNUMake/comps/cray.mak b/Tools/GNUMake/comps/cray.mak
    index 85a1133e412..cf484e6ec38 100644
    --- a/Tools/GNUMake/comps/cray.mak
    +++ b/Tools/GNUMake/comps/cray.mak
    @@ -53,10 +53,10 @@ else
         # CCE <= 8. So we adjust some flags to achieve similar optimization. See
         # this page:
         # http://pubs.cray.com/content/S-5212/9.0/cray-compiling-environment-cce-release-overview/cce-900-software-enhancements
    -    CXXFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile
    -    CFLAGS   += -O2 -ffast-math #-fsave-loopmark -fsave-decompile
    -    FFLAGS   += -O2 -h list=a
    -    F90FLAGS += -O2 -h list=a
    +    CXXFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile
    +    CFLAGS   += -O3 -ffast-math #-fsave-loopmark -fsave-decompile
    +    FFLAGS   += -O3 -h list=a
    +    F90FLAGS += -O3 -h list=a
       else
         GENERIC_COMP_FLAGS += -h list=a
     
    @@ -73,15 +73,15 @@ endif
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     ifeq ($(CRAY_IS_CLANG_BASED),TRUE)
       CXXFLAGS += -std=$(CXXSTD)
    -  CFLAGS   += -std=c99
    +  CFLAGS   += -std=c11
     else
       CXXFLAGS += -h std=$(CXXSTD)
    -  CFLAGS   += -h c99
    +  CFLAGS   += -h c11
     endif
     
     F90FLAGS += -N 255 -em
    @@ -119,10 +119,6 @@ else
       endif
     endif
     
    -ifeq ($(CRAY_IS_CLANG_BASED),TRUE)
    -  CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    -endif
    -
     CXXFLAGS += $(GENERIC_COMP_FLAGS)
     CFLAGS   += $(GENERIC_COMP_FLAGS)
     FFLAGS   += $(GENERIC_COMP_FLAGS)
    diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak
    index d2f7f72108e..33c05fc0c7a 100644
    --- a/Tools/GNUMake/comps/dpcpp.mak
    +++ b/Tools/GNUMake/comps/dpcpp.mak
    @@ -36,8 +36,6 @@ else
     
     endif
     
    -CXXFLAGS += -Wno-pass-failed # disable this warning
    -
     ifeq ($(WARN_ALL),TRUE)
       warning_flags = -Wall -Wextra -Wno-sign-compare -Wunreachable-code -Wnull-dereference
       warning_flags += -Wfloat-conversion -Wextra-semi
    @@ -71,7 +69,7 @@ else
     endif
     
     CXXFLAGS += -Wno-error=sycl-strict -fsycl
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     ifneq ($(DEBUG),TRUE)  # There is currently a bug that DEBUG build will crash.
     ifeq ($(DPCPP_AOT),TRUE)
    diff --git a/Tools/GNUMake/comps/gnu.mak b/Tools/GNUMake/comps/gnu.mak
    index 10510f30a8d..2d67d418717 100644
    --- a/Tools/GNUMake/comps/gnu.mak
    +++ b/Tools/GNUMake/comps/gnu.mak
    @@ -38,23 +38,23 @@ ifeq ($(EXPORT_DYNAMIC),TRUE)
       GENERIC_GNU_FLAGS += -rdynamic -fno-omit-frame-pointer
     endif
     
    -gcc_major_ge_5 = $(shell expr $(gcc_major_version) \>= 5)
    -gcc_major_ge_6 = $(shell expr $(gcc_major_version) \>= 6)
    -gcc_major_ge_7 = $(shell expr $(gcc_major_version) \>= 7)
     gcc_major_ge_8 = $(shell expr $(gcc_major_version) \>= 8)
     gcc_major_ge_9 = $(shell expr $(gcc_major_version) \>= 9)
     gcc_major_ge_10 = $(shell expr $(gcc_major_version) \>= 10)
     gcc_major_ge_11 = $(shell expr $(gcc_major_version) \>= 11)
    +gcc_major_ge_12 = $(shell expr $(gcc_major_version) \>= 12)
    +
    +ifneq ($(gcc_major_ge_8),1)
    +  $(error GCC < 8 not supported)
    +endif
     
     ifeq ($(THREAD_SANITIZER),TRUE)
       GENERIC_GNU_FLAGS += -fsanitize=thread
     endif
     ifeq ($(FSANITIZER),TRUE)
       GENERIC_GNU_FLAGS += -fsanitize=address -fsanitize=undefined
    -  ifeq ($(gcc_major_ge_8),1)
    -    GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract
    -    GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow
    -  endif
    +  GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract
    +  GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow
     endif
     
     ifeq ($(USE_OMP),TRUE)
    @@ -97,7 +97,7 @@ else
     endif
     
     ifeq ($(WARN_ALL),TRUE)
    -  warning_flags = -Wall -Wextra
    +  warning_flags = -Wall -Wextra -Wlogical-op -Wfloat-conversion -Wnull-dereference -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches
     
       ifeq ($(WARN_SIGN_COMPARE),FALSE)
         warning_flags += -Wno-sign-compare
    @@ -108,27 +108,15 @@ ifeq ($(WARN_ALL),TRUE)
         warning_flags += -Wpedantic
       endif
     
    -  ifeq ($(gcc_major_ge_6),1)
    -    warning_flags += -Wnull-dereference
    -  endif
    -
    -  ifeq ($(gcc_major_ge_5),1)
    -    warning_flags += -Wfloat-conversion
    -  endif
    -
       ifneq ($(WARN_SHADOW),FALSE)
         warning_flags += -Wshadow
       endif
     
    -  ifeq ($(gcc_major_version),7)
    -    warning_flags += -Wno-array-bounds
    -  endif
    -
       ifeq ($(gcc_major_ge10),1)
         warning_flags += -Wextra-semi
       endif
     
    -  CXXFLAGS += $(warning_flags) -Woverloaded-virtual
    +  CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor
       CFLAGS += $(warning_flags)
     endif
     
    @@ -157,21 +145,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifneq ($(NO_CONFIG_CHECKING),TRUE)
    -      ifeq ($(CXXSTD),c++14)
    -        $(error C++14 support requires GCC 5 or newer.)
    -      endif
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=gnu99
    +CFLAGS   += -std=c11
     
     ########################################################################
     
    diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak
    index d94f8f3c66f..6005409f9cc 100644
    --- a/Tools/GNUMake/comps/hip.mak
    +++ b/Tools/GNUMake/comps/hip.mak
    @@ -23,7 +23,7 @@ endif
     
     # Generic flags, always used
     CXXFLAGS = -std=$(CXXSTD) -m64
    -CFLAGS   = -std=c99 -m64
    +CFLAGS   = -std=c11 -m64
     
     FFLAGS   = -ffixed-line-length-none -fno-range-check -fno-second-underscore
     F90FLAGS = -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none
    @@ -86,8 +86,6 @@ ifeq ($(HIP_COMPILER),clang)
     
       endif
     
    -  CXXFLAGS += -Wno-pass-failed  # disable this warning
    -
       ifeq ($(WARN_ALL),TRUE)
         warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference
         warning_flags += -Wfloat-conversion -Wextra-semi
    @@ -109,7 +107,7 @@ ifeq ($(HIP_COMPILER),clang)
     
       # Generic HIP info
       ROC_PATH=$(realpath $(dir $(HIP_PATH)))
    -  SYSTEM_INCLUDE_LOCATIONS += $(HIP_PATH)/include
    +  SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include $(HIP_PATH)/include
     
       # rocRand
       SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/hiprand $(ROC_PATH)/include/rocrand
    @@ -122,13 +120,12 @@ ifeq ($(HIP_COMPILER),clang)
       # rocThrust - Header only
       # SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/rocthrust
     
    -  ifeq ($(USE_ROCTX),TRUE)
       # rocTracer
    -  CXXFLAGS += -DAMREX_USE_ROCTX
    -  HIPCC_FLAGS += -DAMREX_USE_ROCTX
    -  SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/roctracer $(ROC_PATH)/include/rocprofiler
    -  LIBRARY_LOCATIONS += $(ROC_PATH)/lib
    -  LIBRARIES += -lroctracer64 -lroctx64
    +  ifeq ($(USE_ROCTX),TRUE)
    +    CXXFLAGS += -DAMREX_USE_ROCTX
    +    HIPCC_FLAGS += -DAMREX_USE_ROCTX
    +    LIBRARY_LOCATIONS += $(ROC_PATH)/lib
    +    LIBRARIES += -Wl,--rpath=$(ROC_PATH)/lib -lroctracer64 -lroctx64
       endif
     
       # hipcc passes a lot of unused arguments to clang
    diff --git a/Tools/GNUMake/comps/intel.mak b/Tools/GNUMake/comps/intel.mak
    index 0c4d6e30b2a..2341192d163 100644
    --- a/Tools/GNUMake/comps/intel.mak
    +++ b/Tools/GNUMake/comps/intel.mak
    @@ -39,21 +39,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifneq ($(firstword $(sort 17.0 $(intel_version))), 17.0)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires Intel icpc 17.0 or newer.)
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(firstword $(sort 17.0 $(intel_version))), 17.0)
    -    CXXFLAGS += -std=c++14
    -  else
    -    $(error Intel icpc 17.0 or newer is required.)
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     F90FLAGS += -implicitnone
     
    @@ -64,11 +55,7 @@ FMODULES = -module $(fmoddir) -I$(fmoddir)
     GENERIC_COMP_FLAGS =
     
     ifeq ($(USE_OMP),TRUE)
    -  ifeq ($(firstword $(sort 16.0 $(intel_version))), 16.0) 
    -    GENERIC_COMP_FLAGS += -qopenmp
    -  else
    -    GENERIC_COMP_FLAGS += -openmp
    -  endif
    +  GENERIC_COMP_FLAGS += -qopenmp
     endif
     
     CXXFLAGS += $(GENERIC_COMP_FLAGS) -pthread
    diff --git a/Tools/GNUMake/comps/llvm-flang.mak b/Tools/GNUMake/comps/llvm-flang.mak
    index 58a0a06b64e..c9abdaaaeeb 100644
    --- a/Tools/GNUMake/comps/llvm-flang.mak
    +++ b/Tools/GNUMake/comps/llvm-flang.mak
    @@ -43,11 +43,11 @@ endif
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FMODULES = -J$(fmoddir) -I $(fmoddir)
     
    diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak
    index 2bf710c0d94..ead1d9290c2 100644
    --- a/Tools/GNUMake/comps/llvm.mak
    +++ b/Tools/GNUMake/comps/llvm.mak
    @@ -50,7 +50,7 @@ ifeq ($(WARN_ALL),TRUE)
         warning_flags += -Wshadow
       endif
     
    -  CXXFLAGS += $(warning_flags) -Woverloaded-virtual
    +  CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor
       CFLAGS += $(warning_flags)
     endif
     
    @@ -60,18 +60,18 @@ ifeq ($(WARN_ERROR),TRUE)
     endif
     
     # disable some warnings
    -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    +CXXFLAGS += -Wno-c++17-extensions
     
     ########################################################################
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FFLAGS   += -ffixed-line-length-none -fno-range-check -fno-second-underscore
     F90FLAGS += -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none
    diff --git a/Tools/GNUMake/comps/nag.mak b/Tools/GNUMake/comps/nag.mak
    index faaf0db7155..55ec14b0620 100644
    --- a/Tools/GNUMake/comps/nag.mak
    +++ b/Tools/GNUMake/comps/nag.mak
    @@ -52,17 +52,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  CXXFLAGS += -std=c++14
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=gnu99
    +CFLAGS   += -std=c11
     
     FFLAGS   += -mismatch
     F90FLAGS += -mismatch -u
    diff --git a/Tools/GNUMake/comps/nvcc.mak b/Tools/GNUMake/comps/nvcc.mak
    index 9d9bf90ce51..f52dfeb6c86 100644
    --- a/Tools/GNUMake/comps/nvcc.mak
    +++ b/Tools/GNUMake/comps/nvcc.mak
    @@ -10,21 +10,11 @@ else
       nvcc_minor_version := 9
     endif
     
    -# Disallow CUDA toolkit versions < 10
    +# Disallow CUDA toolkit versions < 11
     
    -nvcc_major_lt_10 = $(shell expr $(nvcc_major_version) \< 10)
    -ifeq ($(nvcc_major_lt_10),1)
    -  $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 10.0 or newer.)
    -endif
    -
    -nvcc_forward_unknowns = 0
    -ifeq ($(shell expr $(nvcc_major_version) \= 10),1)
    -ifeq ($(shell expr $(nvcc_minor_version) \>= 2),1)
    -  nvcc_forward_unknowns = 1
    -endif
    -endif
    -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1)
    -  nvcc_forward_unknowns = 1
    +nvcc_major_lt_11 = $(shell expr $(nvcc_major_version) \< 11)
    +ifeq ($(nvcc_major_lt_11),1)
    +  $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 11.0 or newer.)
     endif
     
     ifeq ($(shell expr $(nvcc_major_version) \= 11),1)
    @@ -34,24 +24,6 @@ ifeq ($(shell expr $(nvcc_minor_version) \= 0),1)
     endif
     endif
     
    -ifeq ($(shell expr $(nvcc_major_version) \< 11),1)
    -  # -MMD -MP not supported in < 11
    -  USE_LEGACY_DEPFLAGS = TRUE
    -  DEPFLAGS =
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \< 10),1)
    -  # -MM not supported in < 10
    -  LEGACY_DEPFLAGS = -M
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \= 10),1)
    -ifeq ($(shell expr $(nvcc_minor_version) \= 0),1)
    -  # -MM not supported in 10.0
    -  LEGACY_DEPFLAGS = -M
    -endif
    -endif
    -
     #
     # nvcc compiler driver does not always accept pgc++
     # as a host compiler at present. However, if we're using
    @@ -72,16 +44,14 @@ endif
     
     ifeq ($(lowercase_nvcc_host_comp),gnu)
     
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifneq ($(NO_CONFIG_CHECKING),TRUE)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    +  ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +    $(error GCC >= 8 required.)
       endif
     
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
       else
    -    CXXSTD = c++14
    +    CXXSTD = c++17
       endif
       CXXFLAGS += -std=$(CXXSTD)
     
    @@ -95,27 +65,22 @@ ifeq ($(lowercase_nvcc_host_comp),gnu)
     else ifeq ($(lowercase_nvcc_host_comp),pgi)
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
    -    ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -      ifeq ($(CXXSTD),c++14)
    -        $(error C++14 support requires GCC 5 or newer.)
    -      endif
    -    endif
       else
    -    CXXSTD := c++14
    +    CXXSTD := c++17
       endif
     
       CXXFLAGS += -std=$(CXXSTD)
     
       NVCC_CCBIN ?= pgc++
     
    -  # In pgi.make, we use gcc_major_version to handle c++14 flag.
    +  # In pgi.make, we use gcc_major_version to handle c++17 flag.
       CXXFLAGS_FROM_HOST := -ccbin=$(NVCC_CCBIN) -Xcompiler='$(CXXFLAGS)' --std=$(CXXSTD)
       CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST)
     else
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
       else
    -    CXXSTD := c++14
    +    CXXSTD := c++17
       endif
     
       NVCC_CCBIN ?= $(CXX)
    @@ -124,7 +89,7 @@ else
       CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST)
     endif
     
    -NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda
    +NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda --forward-unknown-to-host-compiler
     # This is to work around a bug with nvcc, see: https://github.com/kokkos/kokkos/issues/1473
     NVCC_FLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
     
    @@ -154,11 +119,6 @@ endif
     
     NVCC_FLAGS += $(XTRA_NVCC_FLAGS)
     
    -ifeq ($(nvcc_forward_unknowns),1)
    -  NVCC_FLAGS += --forward-unknown-to-host-compiler
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1)
     ifeq ($(GPU_ERROR_CAPTURE_THIS),TRUE)
       NVCC_FLAGS += --Werror ext-lambda-captures-this
     else
    @@ -166,7 +126,6 @@ ifeq ($(GPU_WARN_CAPTURE_THIS),TRUE)
       NVCC_FLAGS += --Wext-lambda-captures-this
     endif
     endif
    -endif
     
     nvcc_diag_error = 0
     ifeq ($(shell expr $(nvcc_major_version) \>= 12),1)
    diff --git a/Tools/GNUMake/comps/nvhpc.mak b/Tools/GNUMake/comps/nvhpc.mak
    index 49f815213f1..d76e7c9d36e 100644
    --- a/Tools/GNUMake/comps/nvhpc.mak
    +++ b/Tools/GNUMake/comps/nvhpc.mak
    @@ -94,19 +94,15 @@ endif
     # The logic here should be consistent with what's in nvcc.mak
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    +  ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +    $(error GCC >= 8 required.)
       endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -c99
    +CFLAGS   += -c11
     
     CXXFLAGS += $(GENERIC_NVHPC_FLAGS)
     CFLAGS   += $(GENERIC_NVHPC_FLAGS)
    diff --git a/Tools/GNUMake/comps/pgi.mak b/Tools/GNUMake/comps/pgi.mak
    index 0cf50d77287..d2736c71a33 100644
    --- a/Tools/GNUMake/comps/pgi.mak
    +++ b/Tools/GNUMake/comps/pgi.mak
    @@ -87,20 +87,18 @@ endif
     
     # The logic here should be consistent with what's in nvcc.mak
     
    -ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -  $(error C++14 support requires GCC 5 or newer.)
    +ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +  $(error GCC >= 8 required)
     endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -c99
    +CFLAGS   += -c11
     
     CXXFLAGS += $(GENERIC_PGI_FLAGS)
     CFLAGS   += $(GENERIC_PGI_FLAGS)
    diff --git a/Tools/GNUMake/packages/Make.hdf5 b/Tools/GNUMake/packages/Make.hdf5
    index d09fe43a082..9d54463ce4e 100644
    --- a/Tools/GNUMake/packages/Make.hdf5
    +++ b/Tools/GNUMake/packages/Make.hdf5
    @@ -27,8 +27,9 @@ ifeq ($(USE_HDF5_ZFP),TRUE)
           ZFP_ABSPATH = $(abspath $(ZFP_HOME))
           H5Z_ABSPATH = $(abspath $(H5Z_HOME))
           INCLUDE_LOCATIONS += $(ZFP_ABSPATH)/include $(H5Z_ABSPATH)/include
    -      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(H5Z_ABSPATH)/lib
    +      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(ZFP_ABSPATH)/lib64 $(H5Z_ABSPATH)/lib
           LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib
    +      LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib64
         endif
       endif
     endif
    diff --git a/Tools/GNUMake/packages/Make.hypre b/Tools/GNUMake/packages/Make.hypre
    index 11e0690a67e..d2cc0d7c17a 100644
    --- a/Tools/GNUMake/packages/Make.hypre
    +++ b/Tools/GNUMake/packages/Make.hypre
    @@ -19,5 +19,5 @@ ifdef AMREX_HYPRE_HOME
     endif
     
     ifeq ($(USE_CUDA),TRUE)
    -  LIBRARIES += -lcusparse -lcurand
    +  LIBRARIES += -lcusparse -lcurand -lcublas
     endif
    diff --git a/Tools/GNUMake/sites/Make.alcf b/Tools/GNUMake/sites/Make.alcf
    index 324d419ccce..cf607596515 100644
    --- a/Tools/GNUMake/sites/Make.alcf
    +++ b/Tools/GNUMake/sites/Make.alcf
    @@ -8,3 +8,78 @@ ifeq ($(which_computer),theta)
         LIBRARIES += -lmpichf90
       endif
     endif
    +
    +ifeq ($(which_computer),$(filter $(which_computer),polaris))
    +
    +  ifdef PE_ENV
    +    ifneq ($(USE_GPU),TRUE)
    +      lowercase_peenv := $(shell echo $(PE_ENV) | tr A-Z a-z)
    +      ifneq ($(lowercase_peenv),$(lowercase_comp))
    +        has_compiler_mismatch = COMP=$(COMP) does not match PrgEnv-$(lowercase_peenv)
    +      endif
    +      ifeq ($(MAKECMDGOALS),)
    +        ifeq ($(lowercase_peenv),nvidia)
    +          $(error PrgEnv-nvidia cannot be used with CPU-only builds. Try PrgEnv-gnu instead.)
    +        endif
    +      endif
    +    endif
    +  endif
    +
    +  ifeq ($(USE_CUDA),TRUE)
    +    CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))'
    +    CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))'
    +  else ifeq ($(USE_MPI),FALSE)
    +    CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    +    CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
    +  endif
    +
    +  ifeq ($(USE_MPI),TRUE)
    +    ifneq ($(USE_CUDA),TRUE)
    +      CC  = cc
    +      CXX = CC
    +      FC  = ftn
    +      F90 = ftn
    +      LIBRARIES += -lmpichf90
    +    endif
    +
    +    includes += $(shell CC --cray-print-opts=cflags)
    +  endif
    +
    +  ifeq ($(USE_CUDA),TRUE)
    +    CUDA_ARCH = 80
    +
    +    ifeq ($(USE_MPI), FALSE)
    +      includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
    +    endif
    +
    +    comm := ,
    +    ifneq ($(BL_NO_FORT),TRUE)
    +      LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs))
    +    else
    +      LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs))
    +    endif
    +
    +    ifneq ($(CUDA_ROOT),)
    +        SYSTEM_CUDA_PATH := $(CUDA_ROOT)
    +        COMPILE_CUDA_PATH := $(CUDA_ROOT)
    +    else ifneq ($(CUDA_HOME),)
    +        SYSTEM_CUDA_PATH := $(CUDA_HOME)
    +        COMPILE_CUDA_PATH := $(CUDA_HOME)
    +    else ifneq ($(CUDA_PATH),)
    +        SYSTEM_CUDA_PATH := $(CUDA_PATH)
    +        COMPILE_CUDA_PATH := $(CUDA_PATH)
    +    else ifneq ($(NVIDIA_PATH),)
    +        SYSTEM_CUDA_PATH := $(NVIDIA_PATH)/cuda
    +        COMPILE_CUDA_PATH := $(NVIDIA_PATH)/cuda
    +    else
    +        $(error No CUDA_ROOT nor CUDA_HOME nor CUDA_PATH found. Please load a cuda module.)
    +    endif
    +
    +    # Provide system configuration information.
    +
    +    GPUS_PER_NODE=4
    +    GPUS_PER_SOCKET=4
    +
    +  endif
    +
    +endif
    \ No newline at end of file
    diff --git a/Tools/GNUMake/sites/Make.nersc b/Tools/GNUMake/sites/Make.nersc
    index c8c938a627a..426b9525887 100644
    --- a/Tools/GNUMake/sites/Make.nersc
    +++ b/Tools/GNUMake/sites/Make.nersc
    @@ -25,11 +25,16 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    -      CFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))"
    -      CXXFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))"
    +    ifdef NPE_VERSION
    +      CFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicc -show 2> /dev/null)))'
    +      CXXFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicxx -show 2> /dev/null)))'
    +    else
    +      CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))'
    +      CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))'
    +    endif
       else ifeq ($(USE_MPI),FALSE)
    -      CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    -      CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
    +    CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    +    CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
       endif
     
       ifeq ($(USE_MPI),TRUE)
    @@ -41,7 +46,9 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           LIBRARIES += -lmpichf90
         endif
     
    -    includes += $(shell CC --cray-print-opts=cflags)
    +    ifndef NPE_VERSION
    +      includes += $(shell CC --cray-print-opts=cflags)
    +    endif
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    @@ -51,11 +58,23 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
         endif
     
    +    ifdef NPE_VERSION
    +      includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
    +    endif
    +
         comm := ,
         ifneq ($(BL_NO_FORT),TRUE)
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpifort -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs))
    +      endif
         else
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpicxx -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs))
    +      endif
         endif
     
         ifneq ($(CUDA_ROOT),)
    diff --git a/Tools/GNUMake/sites/Make.nrel b/Tools/GNUMake/sites/Make.nrel
    index 68ac8e5116f..ca705698ea1 100644
    --- a/Tools/GNUMake/sites/Make.nrel
    +++ b/Tools/GNUMake/sites/Make.nrel
    @@ -40,27 +40,32 @@ else ifeq ($(which_computer), rhodes)
       endif
     endif
     
    -# Account for Intel-MPI, MPICH, OpenMPI, and HPE MPT
     ifeq ($(USE_MPI),TRUE)
    +  CXX := mpicxx
    +  CC  := mpicc
    +  FC  := mpif90
    +  F90 := mpif90
       ifeq ($(COMP), intel)
    -    CXX := mpiicpc
    -    CC  := mpiicc
    -    FC  := mpiifort
    -    F90 := mpiifort
    -  else
    -    CXX := mpicxx
    -    CC  := mpicc
    -    FC  := mpif90
    -    F90 := mpif90
    -    ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -link_info)
    -      LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    -    else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -showme:link)
    -      LIBRARIES += $(mpif90_link_flags)
    -    else
    -      # MPT case (no option available to query link flags)
    -      LIBRARIES += -lmpi
    +    ifeq ($(which_computer), eagle)
    +        # Always assume MPT on Eagle
    +        export MPICXX_CXX := icpc
    +        export MPICC_CC   := icc
    +        export MPIF90_F90 := ifort
    +    else ifeq ($(which_computer), rhodes)
    +        CXX := mpiicpc
    +        CC  := mpiicc
    +        FC  := mpiifort
    +        F90 := mpiifort
         endif
       endif
    +  ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -link_info)
    +    LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    +  else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -showme:link)
    +    LIBRARIES += $(mpif90_link_flags)
    +  else
    +    # MPT case (no option available to query link flags)
    +    LIBRARIES += -lmpi
    +  endif
     endif
    diff --git a/Tools/GNUMake/sites/Make.olcf b/Tools/GNUMake/sites/Make.olcf
    index 651971c6c95..69f557786df 100644
    --- a/Tools/GNUMake/sites/Make.olcf
    +++ b/Tools/GNUMake/sites/Make.olcf
    @@ -2,7 +2,7 @@
     # For Summit et al. at OLCF
     #
     
    -OLCF_MACHINES := summit ascent spock crusher
    +OLCF_MACHINES := summit ascent spock crusher frontier
     
     ifneq ($(which_computer), $(findstring $(which_computer), $(OLCF_MACHINES)))
       $(error Unknown OLCF computer, $(which_computer))
    @@ -60,7 +60,7 @@ ifeq ($(which_computer),spock)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += $(PE_MPICH_GTL_DIR_gfx908) -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx908) -lmpi_gtl_hsa
         endif
       endif
     endif
    @@ -80,7 +80,27 @@ ifeq ($(which_computer),crusher)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa
    +    endif
    +  endif
    +endif
    +
    +ifeq ($(which_computer),frontier)
    +  ifeq ($(USE_HIP),TRUE)
    +    # MI250X
    +    AMD_ARCH=gfx90a
    +  endif
    +
    +  ifeq ($(USE_MPI),TRUE)
    +    includes += $(shell CC --cray-print-opts=cflags)
    +    ifneq ($(BL_NO_FORT),TRUE)
    +      LIBRARIES += $(shell ftn --cray-print-opts=libs)
    +    else
    +      LIBRARIES += $(shell CC --cray-print-opts=libs)
    +    endif
    +    # for gpu aware mpi
    +    ifeq ($(USE_HIP),TRUE)
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa
         endif
       endif
     endif
    diff --git a/Tools/GNUMake/sites/Make.unknown b/Tools/GNUMake/sites/Make.unknown
    index 332a7a558de..2ecf6a50ddb 100644
    --- a/Tools/GNUMake/sites/Make.unknown
    +++ b/Tools/GNUMake/sites/Make.unknown
    @@ -29,6 +29,8 @@ ifeq ($(USE_MPI),TRUE)
     
       ifeq ($(LINK_WITH_FORTRAN_COMPILER),TRUE)
         MPI_OTHER_COMP := mpicxx
    +  else ifeq ($(BL_NO_FORT),TRUE)
    +    MPI_OTHER_COMP := mpicxx
       else
         MPI_OTHER_COMP := mpif90
       endif
    @@ -55,7 +57,10 @@ ifeq ($(USE_MPI),TRUE)
          mpi_link_flags := $(filter-out $(mpi_filter), $(mpi_link_flags))
       endif
     
    -  LIBRARIES += $(mpi_link_flags) $(mpicxx_link_libs)
    +  LIBRARIES += $(mpi_link_flags)
    +  ifneq ($(MPI_OTHER_COMP),mpicxx)
    +    LIBRARIES += $(mpicxx_link_libs)
    +  endif
     
       # OpenMPI specific flag
       # Uncomment if statement if flag causes issue with another compiler.
    diff --git a/Tools/Plotfile/CMakeLists.txt b/Tools/Plotfile/CMakeLists.txt
    index 44f99d9523c..9f8f066fbbb 100644
    --- a/Tools/Plotfile/CMakeLists.txt
    +++ b/Tools/Plotfile/CMakeLists.txt
    @@ -34,5 +34,5 @@ target_include_directories(fsnapshot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
     target_sources(fsnapshot PRIVATE AMReX_PPMUtil.H AMReX_PPMUtil.cpp)
     if (AMReX_CUDA)
        set_source_files_properties(AMReX_PPMUtil.cpp PROPERTIES LANGUAGE CUDA)
    -   target_compile_features(fsnapshot PUBLIC cxx_std_14)
    +   target_compile_features(fsnapshot PUBLIC cxx_std_17)
     endif()
    diff --git a/Tools/Postprocessing/python/column_depth.py b/Tools/Postprocessing/python/column_depth.py
    index 3aff2ac4705..be17d6bb663 100755
    --- a/Tools/Postprocessing/python/column_depth.py
    +++ b/Tools/Postprocessing/python/column_depth.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     import sys
     import numpy
     
    diff --git a/Tools/Postprocessing/python/conv_slopes.py b/Tools/Postprocessing/python/conv_slopes.py
    index f2fe5404aae..9f1a22e3960 100755
    --- a/Tools/Postprocessing/python/conv_slopes.py
    +++ b/Tools/Postprocessing/python/conv_slopes.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     import sys
     import os
     import commands
    diff --git a/Tools/Postprocessing/python/dumpparthistory.py b/Tools/Postprocessing/python/dumpparthistory.py
    index 092f924423b..23f6d22d1a8 100755
    --- a/Tools/Postprocessing/python/dumpparthistory.py
    +++ b/Tools/Postprocessing/python/dumpparthistory.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple routine to parse particle files and dump out the particle
     # histories into separate files (1 file per particle) so that they can
    @@ -96,8 +96,3 @@ def main(files):
             sys.exit(2)
     
         main(sys.argv[1:])
    -
    -
    -
    -
    -
    diff --git a/Tools/Postprocessing/python/test_helmeos.py b/Tools/Postprocessing/python/test_helmeos.py
    index 890a66aef77..824f369cf60 100755
    --- a/Tools/Postprocessing/python/test_helmeos.py
    +++ b/Tools/Postprocessing/python/test_helmeos.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     #
     # a script showing how to use the helmeos module
     # it reads T, rho, X data from a sample data file, calculates abar and zbar
    diff --git a/Tools/Postprocessing/python/test_parseparticles.py b/Tools/Postprocessing/python/test_parseparticles.py
    index b9181af4d8a..8a85fe2faf6 100755
    --- a/Tools/Postprocessing/python/test_parseparticles.py
    +++ b/Tools/Postprocessing/python/test_parseparticles.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # simple script showing how to make plots of particles using the parseparticles
     # module
    @@ -92,4 +92,3 @@ def main(fileList):
     
     # this is for profiling
     #    cProfile.run("main(sys.argv[1:])","profile.tmp2")
    -
    diff --git a/Tools/Py_util/plotsinglevar.py b/Tools/Py_util/plotsinglevar.py
    index 616c516c805..bb1c2abacaa 100755
    --- a/Tools/Py_util/plotsinglevar.py
    +++ b/Tools/Py_util/plotsinglevar.py
    @@ -1,11 +1,9 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple script to plot 2-d or 3-d BoxLib data using the matplotlib
     # library
     #
     
    -from __future__ import print_function
    -
     import matplotlib
     matplotlib.use('agg')
     
    diff --git a/Tools/Release/ppCleanup.py b/Tools/Release/ppCleanup.py
    index 109444daff3..2935d0c1983 100755
    --- a/Tools/Release/ppCleanup.py
    +++ b/Tools/Release/ppCleanup.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/Release/ppCleanupDir.py b/Tools/Release/ppCleanupDir.py
    index befebc15f2d..2d8a598291d 100755
    --- a/Tools/Release/ppCleanupDir.py
    +++ b/Tools/Release/ppCleanupDir.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/Release/release.py b/Tools/Release/release.py
    index 87de82e5a30..8f2b4d9d5dc 100755
    --- a/Tools/Release/release.py
    +++ b/Tools/Release/release.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/libamrex/configure.py b/Tools/libamrex/configure.py
    index ac4b399a471..ebb3cd369f4 100755
    --- a/Tools/libamrex/configure.py
    +++ b/Tools/libamrex/configure.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for configure.py")
    -
     import argparse
     
     def configure(argv):
    diff --git a/Tools/libamrex/mkconfig.py b/Tools/libamrex/mkconfig.py
    index 30c54f285a2..21f66348891 100755
    --- a/Tools/libamrex/mkconfig.py
    +++ b/Tools/libamrex/mkconfig.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys, re
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkconfig.py")
    -
     import argparse
     
     def doit(defines, undefines, comp, allow_diff_comp):
    diff --git a/Tools/libamrex/mkpkgconfig.py b/Tools/libamrex/mkpkgconfig.py
    index be91e8736a8..c8a626901da 100755
    --- a/Tools/libamrex/mkpkgconfig.py
    +++ b/Tools/libamrex/mkpkgconfig.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkpkgconfig.py")
    -
     import argparse
     
     def doit(prefix, version, cflags, libs, libpriv, fflags):
    diff --git a/Tools/libamrex/mkversionheader.py b/Tools/libamrex/mkversionheader.py
    index f2f6f8865f9..b1dbf0eb2ad 100755
    --- a/Tools/libamrex/mkversionheader.py
    +++ b/Tools/libamrex/mkversionheader.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys, re
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkversionheader.py")
    -
     import argparse
     
     def doit(code, defines):
    diff --git a/Tools/typechecker/typechecker.py b/Tools/typechecker/typechecker.py
    index 2086b22d1b5..6035b7a6c15 100755
    --- a/Tools/typechecker/typechecker.py
    +++ b/Tools/typechecker/typechecker.py
    @@ -1,6 +1,4 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import os
     import sys