diff --git a/Docs/source/install/hpc/cori.rst b/Docs/source/install/hpc/cori.rst
index 400c413c2af..96eb57137d1 100644
--- a/Docs/source/install/hpc/cori.rst
+++ b/Docs/source/install/hpc/cori.rst
@@ -32,29 +32,9 @@ KNL
We use the following modules and environments on the system (``$HOME/knl_warpx.profile``).
-.. code-block:: bash
-
- module swap craype-haswell craype-mic-knl
- module swap PrgEnv-intel PrgEnv-gnu
- module load cmake/3.21.3
- module switch cray-libsci cray-libsci/20.09.1
- module load cray-hdf5-parallel/1.10.5.2
- module load cray-fftw/3.3.8.4
- module load cray-python/3.7.3.2
-
- export PKG_CONFIG_PATH=$FFTW_DIR/pkgconfig:$PKG_CONFIG_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-knl-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-knl-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/blaspp-master-knl-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/lapackpp-master-knl-install:$CMAKE_PREFIX_PATH
-
- if [ -d "$HOME/sw/venvs/knl_warpx" ]
- then
- source $HOME/sw/venvs/knl_warpx/bin/activate
- fi
-
- export CXXFLAGS="-march=knl"
- export CFLAGS="-march=knl"
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/knl_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/knl_warpx.profile.example``.
And install ADIOS2, BLAS++ and LAPACK++:
@@ -105,25 +85,9 @@ Haswell
We use the following modules and environments on the system (``$HOME/haswell_warpx.profile``).
-.. code-block:: bash
-
- module swap PrgEnv-intel PrgEnv-gnu
- module load cmake/3.21.3
- module switch cray-libsci cray-libsci/20.09.1
- module load cray-hdf5-parallel/1.10.5.2
- module load cray-fftw/3.3.8.4
- module load cray-python/3.7.3.2
-
- export PKG_CONFIG_PATH=$FFTW_DIR/pkgconfig:$PKG_CONFIG_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-haswell-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-haswell-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/blaspp-master-haswell-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/lapackpp-master-haswell-install:$CMAKE_PREFIX_PATH
-
- if [ -d "$HOME/sw/venvs/haswell_warpx" ]
- then
- source $HOME/sw/venvs/haswell_warpx/bin/activate
- fi
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/haswell_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/haswell_warpx.profile.example``.
And install ADIOS2, BLAS++ and LAPACK++:
@@ -174,41 +138,11 @@ GPU (V100)
Cori provides a partition with `18 nodes that include V100 (16 GB) GPUs `__.
We use the following modules and environments on the system (``$HOME/gpu_warpx.profile``).
+You can copy this file from ``Tools/machines/cori-nersc/gpu_warpx.profile.example``:
-.. code-block:: bash
-
- export proj="m1759"
-
- module purge
- module load modules
- module load cgpu
- module load esslurm
- module load gcc/8.3.0 cuda/11.4.0 cmake/3.21.3
- module load openmpi
-
- export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-gpu-install:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-gpu-install:$CMAKE_PREFIX_PATH
-
- if [ -d "$HOME/sw/venvs/gpu_warpx" ]
- then
- source $HOME/sw/venvs/gpu_warpx/bin/activate
- fi
-
- # compiler environment hints
- export CC=$(which gcc)
- export CXX=$(which g++)
- export FC=$(which gfortran)
- export CUDACXX=$(which nvcc)
- export CUDAHOSTCXX=$(which g++)
-
- # optimize CUDA compilation for V100
- export AMREX_CUDA_ARCH=7.0
-
- # allocate a GPU, e.g. to compile on
- # 10 logical cores (5 physical), 1 GPU
- function getNode() {
- salloc -C gpu -N 1 -t 30 -c 10 --gres=gpu:1 -A $proj
- }
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/gpu_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/gpu_warpx.profile.example``.
And install ADIOS2:
@@ -306,14 +240,15 @@ Do not forget to first ``source $HOME/knl_warpx.profile`` if you have not done s
For PICMI Python runs, the ```` has to read ``python3`` and the ```` is the path to your PICMI input script.
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_cori.sh
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/cori_knl.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/cori_knl.sbatch``.
-To run a simulation, copy the lines above to a file ``batch_cori.sh`` and run
+To run a simulation, copy the lines above to a file ``cori_knl.sbatch`` and run
.. code-block:: bash
- sbatch batch_cori.sh
+ sbatch cori_knl.sbatch
to submit the job.
@@ -338,15 +273,16 @@ The batch script below can be used to run a WarpX simulation on 1 `Haswell node
Do not forget to first ``source $HOME/haswell_warpx.profile`` if you have not done so already for this terminal session.
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_cori_haswell.sh
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/cori_haswell.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/cori_haswell.sbatch``.
-To run a simulation, copy the lines above to a file ``batch_cori_haswell.sh`` and
+To run a simulation, copy the lines above to a file ``cori_haswell.sbatch`` and
run
.. code-block:: bash
- sbatch batch_cori_haswell.sh
+ sbatch cori_haswell.sbatch
to submit the job.
@@ -367,8 +303,9 @@ For single-node runs, try to run one grid per GPU.
A multi-node batch script template can be found below:
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_cori_gpu.sh
+.. literalinclude:: ../../../../Tools/machines/cori-nersc/cori_gpu.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/cori-nersc/cori_gpu.sbatch``.
.. _post-processing-cori:
diff --git a/Docs/source/install/hpc/crusher.rst b/Docs/source/install/hpc/crusher.rst
index d09a8b97b78..066e137d589 100644
--- a/Docs/source/install/hpc/crusher.rst
+++ b/Docs/source/install/hpc/crusher.rst
@@ -33,7 +33,7 @@ We use the following modules and environments on the system (``$HOME/crusher_war
.. literalinclude:: ../../../../Tools/machines/crusher-olcf/crusher_warpx.profile.example
:language: bash
-
+ :caption: You can copy this file from ``Tools/machines/crusher-olcf/crusher_warpx.profile.example``.
We recommend to store the above lines in a file, such as ``$HOME/crusher_warpx.profile``, and load it into your shell after a login:
@@ -75,6 +75,7 @@ Or in non-interactive runs:
.. literalinclude:: ../../../../Tools/machines/crusher-olcf/submit.sh
:language: bash
+ :caption: You can copy this file from ``Tools/machines/crusher-olcf/submit.sh``.
.. _post-processing-crusher:
diff --git a/Docs/source/install/hpc/juwels.rst b/Docs/source/install/hpc/juwels.rst
index 206078dda2d..967e5e72687 100644
--- a/Docs/source/install/hpc/juwels.rst
+++ b/Docs/source/install/hpc/juwels.rst
@@ -31,38 +31,17 @@ Use the following commands to download the WarpX source code and switch to the c
We use the following modules and environments on the system.
-.. code-block:: bash
-
- # please set your project account
- export proj=
-
- # required dependencies
- module load ccache
- module load CMake
- module load GCC
- module load CUDA/11.3
- module load OpenMPI
- module load FFTW
- module load HDF5
- module load Python
-
- # JUWELS' job scheduler may not map ranks to GPUs,
- # so we give a hint to AMReX about the node layout.
- # This is usually done in Make. files in AMReX
- # but there is no such file for JSC yet.
- export GPUS_PER_SOCKET=2
- export GPUS_PER_NODE=4
-
- # optimize CUDA compilation for V100 (7.0) or for A100 (8.0)
- export AMREX_CUDA_ARCH=8.0
+.. literalinclude:: ../../../../Tools/machines/juwels-jsc/juwels_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/juwels-jsc/juwels_warpx.profile.example``.
Note that for now WarpX must rely on OpenMPI instead of the recommended MPI implementation on this platform MVAPICH2.
-We recommend to store the above lines in a file, such as ``$HOME/warpx.profile``, and load it into your shell after a login:
+We recommend to store the above lines in a file, such as ``$HOME/juwels_warpx.profile``, and load it into your shell after a login:
.. code-block:: bash
- source $HOME/warpx.profile
+ source $HOME/juwels_warpx.profile
Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
@@ -98,8 +77,9 @@ The `Juwels GPUs `
An example submission script reads
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_juwels.sh
+.. literalinclude:: ../../../../Tools/machines/juwels-jsc/juwels.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/juwels-jsc/juwels.sbatch``.
Queue: batch (2 x Intel Xeon Platinum 8168 CPUs, 24 Cores + 24 Hyperthreads/CPU)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Docs/source/install/hpc/lassen.rst b/Docs/source/install/hpc/lassen.rst
index 98434979269..caa492d55ff 100644
--- a/Docs/source/install/hpc/lassen.rst
+++ b/Docs/source/install/hpc/lassen.rst
@@ -25,50 +25,11 @@ Use the following commands to download the WarpX source code and switch to the c
git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
-We use the following modules and environments on the system (``$HOME/warpx.profile``).
-
-.. code-block:: bash
-
- # please set your project account
- export proj=
-
- # required dependencies
- module load cmake/3.20.2
- module load gcc/8.3.1
- module load cuda/11.2.0
-
- # optional: for PSATD support
- module load fftw/3.3.8
-
- # optional: for QED lookup table generation support
- module load boost/1.70.0
-
- # optional: for openPMD support
- # TODO ADIOS2
- module load hdf5-parallel/1.10.4
-
- # optional: for PSATD in RZ geometry support
- # TODO: blaspp lapackpp
-
- # optional: for Python bindings
- module load python/3.8.2
-
- # optional: an alias to request an interactive node for two hours
- alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
-
- # fix system defaults: do not escape $ with a \ on tab completion
- shopt -s direxpand
-
- # optimize CUDA compilation for V100
- export AMREX_CUDA_ARCH=7.0
-
- # compiler environment hints
- export CC=$(which gcc)
- export CXX=$(which g++)
- export FC=$(which gfortran)
- export CUDACXX=$(which nvcc)
- export CUDAHOSTCXX=$(which g++)
+We use the following modules and environments on the system (``$HOME/lassen_warpx.profile``).
+.. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/lassen-llnl/lassen_warpx.profile.example``.
We recommend to store the above lines in a file, such as ``$HOME/lassen_warpx.profile``, and load it into your shell after a login:
@@ -104,14 +65,15 @@ The batch script below can be used to run a WarpX simulation on 2 nodes on the s
Replace descriptions between chevrons ``<>`` by relevant values, for instance ```` could be ``plasma_mirror_inputs``.
Note that the only option so far is to run with one MPI rank per GPU.
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_lassen.sh
+.. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen.bsub
:language: bash
+ :caption: You can copy this file from ``Tools/machines/lassen-llnl/lassen.bsub``.
-To run a simulation, copy the lines above to a file ``batch_lassen.sh`` and run
+To run a simulation, copy the lines above to a file ``lassen.bsub`` and run
.. code-block:: bash
- bsub batch_lassen.sh
+ bsub lassen.bsub
to submit the job.
diff --git a/Docs/source/install/hpc/ookami.rst b/Docs/source/install/hpc/ookami.rst
index b02c780ab7f..91d79c652f8 100644
--- a/Docs/source/install/hpc/ookami.rst
+++ b/Docs/source/install/hpc/ookami.rst
@@ -31,36 +31,9 @@ Use the following commands to download the WarpX source code and switch to the c
We use the following modules and environments on the system (``$HOME/warpx_gcc10.profile``).
-.. code-block:: bash
-
- # please set your project account (not relevant yet)
- #export proj=
-
- # required dependencies
- module load cmake/3.19.0
- module load gcc/10.3.0
- module load openmpi/gcc10/4.1.0
-
- # optional: faster builds (not available yet)
- #module load ccache
- #module load ninja
-
- # optional: for PSATD support (not available yet)
- #module load fftw
-
- # optional: for QED lookup table generation support (not available yet)
- #module load boost
-
- # optional: for openPMD support
- #module load adios2 # not available yet
- #module load hdf5 # only serial
-
- # compiler environment hints
- export CC=$(which gcc)
- export CXX=$(which g++)
- export FC=$(which gfortran)
- export CXXFLAGS="-mcpu=a64fx"
-
+.. literalinclude:: ../../../../Tools/machines/ookami-sbu/ookami_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/ookami-sbu/ookami_warpx.profile.example``.
We recommend to store the above lines in a file, such as ``$HOME/warpx_gcc10.profile``, and load it into your shell after a login:
@@ -126,3 +99,5 @@ We compiled with the Fujitsu Compiler (Clang) with the following build string:
-DAMReX_MPI_THREAD_MULTIPLE=FALSE \
-DWarpX_COMPUTE=OMP
cmake --build build -j 10
+
+Note that the best performance for A64FX is currently achieved with the GCC or ARM compilers.
diff --git a/Docs/source/install/hpc/perlmutter.rst b/Docs/source/install/hpc/perlmutter.rst
index 1719c86a3ca..c8ff1b6c4e5 100644
--- a/Docs/source/install/hpc/perlmutter.rst
+++ b/Docs/source/install/hpc/perlmutter.rst
@@ -5,8 +5,8 @@ Perlmutter (NERSC)
.. warning::
- Perlmutter is still in acceptance testing.
- This page documents our internal testing workflow only.
+ Perlmutter is still in acceptance testing and environments change often.
+ Please reach visit this page often for updates and reach out to us if something needs an update.
The `Perlmutter cluster `_ is located at NERSC.
@@ -33,48 +33,9 @@ Use the following commands to download the WarpX source code and switch to the c
We use the following modules and environments on the system (``$HOME/perlmutter_warpx.profile``).
-.. code-block:: bash
-
- # please set your project account
- export proj= # LBNL/AMP: m3906_g
-
- # required dependencies
- module load cmake/3.22.0
- module swap PrgEnv-nvidia PrgEnv-gnu
- module load cudatoolkit
-
- # optional: just an additional text editor
- # module load nano # TODO: request from support
-
- # optional: for openPMD support
- module load cray-hdf5-parallel/1.12.0.7
- export CMAKE_PREFIX_PATH=$HOME/sw/perlmutter/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
- export CMAKE_PREFIX_PATH=$HOME/sw/perlmutter/adios2-2.7.1:$CMAKE_PREFIX_PATH
-
- # optional: Python, ...
- # TODO
-
- # optional: an alias to request an interactive node for two hours
- function getNode() {
- salloc -N 1 --ntasks-per-node=4 -t 2:00:00 -C gpu -c 32 -G 4 -A $proj
- }
-
- # GPU-aware MPI
- export MPICH_GPU_SUPPORT_ENABLED=1
-
- # necessary to use CUDA-Aware MPI and run a job
- export CRAY_ACCEL_TARGET=nvidia80
-
- # optimize CUDA compilation for A100
- export AMREX_CUDA_ARCH=8.0
-
- # compiler environment hints
- export CC=$(which gcc)
- export CXX=$(which g++)
- export FC=$(which gfortran)
- export CUDACXX=$(which nvcc)
- export CUDAHOSTCXX=$(which g++)
-
+.. literalinclude:: ../../../../Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example``.
We recommend to store the above lines in a file, such as ``$HOME/perlmutter_warpx.profile``, and load it into your shell after a login:
@@ -126,14 +87,15 @@ Replace descriptions between chevrons ``<>`` by relevant values, for instance ``
Note that we run one MPI rank per GPU.
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_perlmutter.sh
+.. literalinclude:: ../../../../Tools/machines/perlmutter-nersc/perlmutter.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/perlmutter-nersc/perlmutter.sbatch``.
-To run a simulation, copy the lines above to a file ``batch_perlmutter.sh`` and run
+To run a simulation, copy the lines above to a file ``perlmutter.sbatch`` and run
.. code-block:: bash
- sbatch batch_perlmutter.sh
+ sbatch perlmutter.sbatch
to submit the job.
diff --git a/Docs/source/install/hpc/quartz.rst b/Docs/source/install/hpc/quartz.rst
index 661d43fcd6d..89481aae65d 100644
--- a/Docs/source/install/hpc/quartz.rst
+++ b/Docs/source/install/hpc/quartz.rst
@@ -25,48 +25,11 @@ Use the following commands to download the WarpX source code and switch to the c
git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
-We use the following modules and environments on the system (``$HOME/warpx.profile``).
-
-.. code-block:: bash
-
- # please set your project account
- export proj=
-
- # required dependencies
- module load cmake/3.20.2
- module load intel/2021.4
- module load mvapich2/2.3
-
- # optional: for PSATD support
- module load fftw/3.3.8
-
- # optional: for QED lookup table generation support
- module load boost/1.73.0
-
- # optional: for openPMD support
- # TODO ADIOS2
- module load hdf5-parallel/1.10.2
-
- # optional: for PSATD in RZ geometry support
- # TODO: blaspp lapackpp
-
- # optional: for Python bindings
- module load python/3.8.2
-
- # optional: an alias to request an interactive node for two hours
- alias getNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug --pty bash"
-
- # fix system defaults: do not escape $ with a \ on tab completion
- shopt -s direxpand
-
- # compiler environment hints
- export CC=$(which icc)
- export CXX=$(which icpc)
- export FC=$(which ifort)
- # we need a newer libstdc++:
- export CFLAGS="-gcc-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/gcc ${CFLAGS}"
- export CXXFLAGS="-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ ${CXXFLAGS}"
+We use the following modules and environments on the system (``$HOME/quartz_warpx.profile``).
+.. literalinclude:: ../../../../Tools/machines/quartz-llnl/quartz_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/quartz-llnl/quartz_warpx.profile.example``.
We recommend to store the above lines in a file, such as ``$HOME/quartz_warpx.profile``, and load it into your shell after a login:
@@ -99,13 +62,14 @@ Intel Xeon E5-2695 v4 CPUs
The batch script below can be used to run a WarpX simulation on 2 nodes on the supercomputer Quartz at LLNL.
Replace descriptions between chevrons ``<>`` by relevant values, for instance ```` could be ``plasma_mirror_inputs``.
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_quartz.sh
+.. literalinclude:: ../../../../Tools/machines/quartz-llnl/quartz.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/quartz-llnl/quartz.sbatch``.
-To run a simulation, copy the lines above to a file ``batch_quartz.sh`` and run
+To run a simulation, copy the lines above to a file ``quartz.sbatch`` and run
.. code-block:: bash
- sbatch batch_quartz.sh
+ sbatch quartz.sbatch
to submit the job.
diff --git a/Docs/source/install/hpc/spock.rst b/Docs/source/install/hpc/spock.rst
index 03fec913384..7ce78bfb93b 100644
--- a/Docs/source/install/hpc/spock.rst
+++ b/Docs/source/install/hpc/spock.rst
@@ -27,46 +27,17 @@ Use the following commands to download the WarpX source code and switch to the c
git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
-We use the following modules and environments on the system (``$HOME/warpx_spock.profile``).
-
-.. code-block:: bash
-
- # please set your project account
- export proj=
-
- # required dependencies
- module load cmake/3.20.2
- module load craype-accel-amd-gfx908
- module load rocm/4.3.0
-
- # optional: faster builds
- module load ccache
- module load ninja
-
- # optional: just an additional text editor
- module load nano
-
- # optional: an alias to request an interactive node for one hour
- alias getNode="salloc -A $proj -J warpx -t 01:00:00 -p ecp -N 1"
-
- # fix system defaults: do not escape $ with a \ on tab completion
- shopt -s direxpand
-
- # optimize CUDA compilation for MI100
- export AMREX_AMD_ARCH=gfx908
-
- # compiler environment hints
- export CC=$ROCM_PATH/llvm/bin/clang
- export CXX=$(which hipcc)
- export LDFLAGS="-L${CRAYLIBS_X86_64} $(CC --cray-print-opts=libs) -lmpi"
- # GPU aware MPI: ${PE_MPICH_GTL_DIR_gfx908} -lmpi_gtl_hsa
+We use the following modules and environments on the system (``$HOME/spock_warpx.profile``).
+.. literalinclude:: ../../../../Tools/machines/spock-olcf/spock_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/spock-olcf/spock_warpx.profile.example``.
-We recommend to store the above lines in a file, such as ``$HOME/warpx_spock.profile``, and load it into your shell after a login:
+We recommend to store the above lines in a file, such as ``$HOME/spock_warpx.profile``, and load it into your shell after a login:
.. code-block:: bash
- source $HOME/warpx_spock.profile
+ source $HOME/spock_warpx.profile
Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
@@ -98,10 +69,11 @@ After requesting an interactive node with the ``getNode`` alias above, run a sim
srun -n 4 -c 2 --ntasks-per-node=4 ./warpx inputs
-Or in non-interactive runs:
+Or in non-interactive runs started with ``sbatch``:
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_spock.sh
+.. literalinclude:: ../../../../Tools/machines/spock-olcf/spock_mi100.sbatch
:language: bash
+ :caption: You can copy this file from ``Tools/machines/spock-olcf/spock_mi100.sbatch``.
We can currently use up to ``4`` nodes with ``4`` GPUs each (maximum: ``-N 4 -n 16``).
diff --git a/Docs/source/install/hpc/summit.rst b/Docs/source/install/hpc/summit.rst
index b1ba3c63ebf..643c241918f 100644
--- a/Docs/source/install/hpc/summit.rst
+++ b/Docs/source/install/hpc/summit.rst
@@ -28,93 +28,18 @@ Use the following commands to download the WarpX source code and switch to the c
git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
-We use the following modules and environments on the system (``$HOME/warpx.profile``).
+We use the following modules and environments on the system (``$HOME/summit_warpx.profile``).
-.. code-block:: bash
-
- # please set your project account
- export proj=
-
- # optional: just an additional text editor
- module load nano
-
- # required dependencies
- module load cmake/3.20.2
- module load gcc/9.3.0
- module load cuda/11.3.1
-
- # optional: faster re-builds
- module load ccache
-
- # optional: for PSATD in RZ geometry support
- module load blaspp/2021.04.01
- module load lapackpp/2021.04.00
-
- # optional: for PSATD support (CPU only)
- #module load fftw/3.3.9
-
- # optional: for QED lookup table generation support
- module load boost/1.76.0
-
- # optional: for openPMD support
- module load adios2/2.7.1
- module load hdf5/1.10.7
-
- # optional: for openPMD support (GNUmake only)
- #module load ums
- #module load ums-aph114
- #module load openpmd-api/0.14.2
-
- # often unstable at runtime with dependencies
- module unload darshan-runtime
-
- # optional: Ascent in situ support
- # note: build WarpX with CMake
- export Ascent_DIR=/gpfs/alpine/csc340/world-shared/software/ascent/2021_09_01_gcc_9_3_0_warpx/summit/cuda/gnu/ascent-install
-
- # optional: for Python bindings or libEnsemble
- module load python/3.8.10
- module load freetype/2.10.4 # matplotlib
-
- # dependencies for numpy, blaspp & lapackpp
- module load openblas/0.3.5-omp
- export BLAS=${OLCF_OPENBLAS_ROOT}/lib/libopenblas.so
- export LAPACK=${OLCF_OPENBLAS_ROOT}/lib/libopenblas.so
-
- if [ -d "$HOME/sw/venvs/warpx" ]
- then
- source $HOME/sw/venvs/warpx/bin/activate
- fi
-
- # an alias to request an interactive batch node for two hours
- # for paralle execution, start on the batch node: jsrun
- alias getNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -Is /bin/bash"
- # an alias to run a command on a batch node for up to 30min
- # usage: nrun
- alias runNode="bsub -q debug -P $proj -W 0:30 -nnodes 1 -I"
-
- # fix system defaults: do not escape $ with a \ on tab completion
- shopt -s direxpand
-
- # make output group-readable by default
- umask 0027
-
- # optimize CUDA compilation for V100
- export AMREX_CUDA_ARCH=7.0
-
- # compiler environment hints
- export CC=$(which gcc)
- export CXX=$(which g++)
- export FC=$(which gfortran)
- export CUDACXX=$(which nvcc)
- export CUDAHOSTCXX=$(which g++)
+.. literalinclude:: ../../../../Tools/machines/summit-olcf/summit_warpx.profile.example
+ :language: bash
+ :caption: You can copy this file from ``Tools/machines/summit-olcf/summit_warpx.profile.example``.
-We recommend to store the above lines in a file, such as ``$HOME/warpx.profile``, and load it into your shell after a login:
+We recommend to store the above lines in a file, such as ``$HOME/summit_warpx.profile``, and load it into your shell after a login:
.. code-block:: bash
- source $HOME/warpx.profile
+ source $HOME/summit_warpx.profile
Optionally, download and install Python packages for :ref:`PICMI ` or dynamic ensemble optimizations (:ref:`libEnsemble `):
@@ -176,17 +101,18 @@ V100 GPUs (16GB)
The batch script below can be used to run a WarpX simulation on 2 nodes on
the supercomputer Summit at OLCF. Replace descriptions between chevrons ``<>``
by relevant values, for instance ```` could be
-``plasma_mirror_inputs``. Note that the only option so far is to run with one
-MPI rank per GPU.
+``plasma_mirror_inputs``.
+Note that WarpX runs with one MPI rank per GPU and there are 6 GPUs per node:
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_summit.sh
+.. literalinclude:: ../../../../Tools/machines/summit-olcf/summit_v100.bsub
:language: bash
+ :caption: You can copy this file from ``Tools/machines/summit-olcf/summit_v100.bsub``.
-To run a simulation, copy the lines above to a file ``batch_summit.sh`` and
+To run a simulation, copy the lines above to a file ``summit_v100.bsub`` and
run
::
- bsub batch_summit.sh
+ bsub summit_v100.bsub
to submit the job.
@@ -203,7 +129,7 @@ regime), the following set of parameters provided good performance:
* **Two `128x128x128` grids per GPU**, or **one `128x128x256` grid per GPU**.
A batch script with more options regarding profiling on Summit can be found at
-:download:`Summit batch script <../../../../Tools/BatchScripts/script_profiling_summit.sh>`
+:download:`Summit batch script <../../../../Tools/machines/summit-olcf/summit_profiling.bsub>`
.. _running-cpp-summit-Power9-CPUs:
@@ -214,8 +140,9 @@ Similar to above, the batch script below can be used to run a WarpX simulation o
1 node on the supercomputer Summit at OLCF, on Power9 CPUs (i.e., the GPUs are
ignored).
-.. literalinclude:: ../../../../Tools/BatchScripts/batch_summit_power9.sh
+.. literalinclude:: ../../../../Tools/machines/summit-olcf/summit_power9.bsub
:language: bash
+ :caption: You can copy this file from ``Tools/machines/summit-olcf/summit_power9.bsub``.
For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell
solver on Summit for a well load-balanced problem, the following set of
diff --git a/Tools/BatchScripts/batch_cori_gpu.sh b/Tools/machines/cori-nersc/cori_gpu.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_cori_gpu.sh
rename to Tools/machines/cori-nersc/cori_gpu.sbatch
diff --git a/Tools/BatchScripts/batch_cori_haswell.sh b/Tools/machines/cori-nersc/cori_haswell.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_cori_haswell.sh
rename to Tools/machines/cori-nersc/cori_haswell.sbatch
diff --git a/Tools/BatchScripts/batch_cori.sh b/Tools/machines/cori-nersc/cori_knl.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_cori.sh
rename to Tools/machines/cori-nersc/cori_knl.sbatch
diff --git a/Tools/machines/cori-nersc/gpu_warpx.profile.example b/Tools/machines/cori-nersc/gpu_warpx.profile.example
new file mode 100644
index 00000000000..7bf4ba96771
--- /dev/null
+++ b/Tools/machines/cori-nersc/gpu_warpx.profile.example
@@ -0,0 +1,32 @@
+export proj="m1759"
+
+module purge
+module load modules
+module load cgpu
+module load esslurm
+module load gcc/8.3.0 cuda/11.4.0 cmake/3.21.3
+module load openmpi
+
+export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-gpu-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-gpu-install:$CMAKE_PREFIX_PATH
+
+if [ -d "$HOME/sw/venvs/gpu_warpx" ]
+then
+ source $HOME/sw/venvs/gpu_warpx/bin/activate
+fi
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=$(which g++)
+
+# optimize CUDA compilation for V100
+export AMREX_CUDA_ARCH=7.0
+
+# allocate a GPU, e.g. to compile on
+# 10 logical cores (5 physical), 1 GPU
+function getNode() {
+ salloc -C gpu -N 1 -t 30 -c 10 --gres=gpu:1 -A $proj
+}
diff --git a/Tools/machines/cori-nersc/haswell_warpx.profile.example b/Tools/machines/cori-nersc/haswell_warpx.profile.example
new file mode 100644
index 00000000000..37a958e64a2
--- /dev/null
+++ b/Tools/machines/cori-nersc/haswell_warpx.profile.example
@@ -0,0 +1,17 @@
+module swap PrgEnv-intel PrgEnv-gnu
+module load cmake/3.21.3
+module switch cray-libsci cray-libsci/20.09.1
+module load cray-hdf5-parallel/1.10.5.2
+module load cray-fftw/3.3.8.4
+module load cray-python/3.7.3.2
+
+export PKG_CONFIG_PATH=$FFTW_DIR/pkgconfig:$PKG_CONFIG_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-haswell-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-haswell-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/blaspp-master-haswell-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/lapackpp-master-haswell-install:$CMAKE_PREFIX_PATH
+
+if [ -d "$HOME/sw/venvs/haswell_warpx" ]
+then
+ source $HOME/sw/venvs/haswell_warpx/bin/activate
+fi
diff --git a/Tools/machines/cori-nersc/knl_warpx.profile.example b/Tools/machines/cori-nersc/knl_warpx.profile.example
new file mode 100644
index 00000000000..b9995650c22
--- /dev/null
+++ b/Tools/machines/cori-nersc/knl_warpx.profile.example
@@ -0,0 +1,21 @@
+module swap craype-haswell craype-mic-knl
+module swap PrgEnv-intel PrgEnv-gnu
+module load cmake/3.21.3
+module switch cray-libsci cray-libsci/20.09.1
+module load cray-hdf5-parallel/1.10.5.2
+module load cray-fftw/3.3.8.4
+module load cray-python/3.7.3.2
+
+export PKG_CONFIG_PATH=$FFTW_DIR/pkgconfig:$PKG_CONFIG_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/c-blosc-1.12.1-knl-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/adios2-2.7.1-knl-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/blaspp-master-knl-install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/lapackpp-master-knl-install:$CMAKE_PREFIX_PATH
+
+if [ -d "$HOME/sw/venvs/knl_warpx" ]
+then
+ source $HOME/sw/venvs/knl_warpx/bin/activate
+fi
+
+export CXXFLAGS="-march=knl"
+export CFLAGS="-march=knl"
diff --git a/Tools/BatchScripts/batch_juwels.sh b/Tools/machines/juwels-jsc/juwels.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_juwels.sh
rename to Tools/machines/juwels-jsc/juwels.sbatch
diff --git a/Tools/machines/juwels-jsc/juwels_warpx.profile.example b/Tools/machines/juwels-jsc/juwels_warpx.profile.example
new file mode 100644
index 00000000000..c5c71a822b4
--- /dev/null
+++ b/Tools/machines/juwels-jsc/juwels_warpx.profile.example
@@ -0,0 +1,22 @@
+# please set your project account
+#export proj=
+
+# required dependencies
+module load ccache
+module load CMake
+module load GCC
+module load CUDA/11.3
+module load OpenMPI
+module load FFTW
+module load HDF5
+module load Python
+
+# JUWELS' job scheduler may not map ranks to GPUs,
+# so we give a hint to AMReX about the node layout.
+# This is usually done in Make. files in AMReX
+# but there is no such file for JSC yet.
+export GPUS_PER_SOCKET=2
+export GPUS_PER_NODE=4
+
+# optimize CUDA compilation for V100 (7.0) or for A100 (8.0)
+export AMREX_CUDA_ARCH=8.0
diff --git a/Tools/BatchScripts/batch_lassen.sh b/Tools/machines/lassen-llnl/lassen.bsub
similarity index 100%
rename from Tools/BatchScripts/batch_lassen.sh
rename to Tools/machines/lassen-llnl/lassen.bsub
diff --git a/Tools/machines/lassen-llnl/lassen_warpx.profile.example b/Tools/machines/lassen-llnl/lassen_warpx.profile.example
new file mode 100644
index 00000000000..8573ab191e3
--- /dev/null
+++ b/Tools/machines/lassen-llnl/lassen_warpx.profile.example
@@ -0,0 +1,39 @@
+# please set your project account
+#export proj=
+
+# required dependencies
+module load cmake/3.20.2
+module load gcc/8.3.1
+module load cuda/11.2.0
+
+# optional: for PSATD support
+module load fftw/3.3.8
+
+# optional: for QED lookup table generation support
+module load boost/1.70.0
+
+# optional: for openPMD support
+# TODO ADIOS2
+module load hdf5-parallel/1.10.4
+
+# optional: for PSATD in RZ geometry support
+# TODO: blaspp lapackpp
+
+# optional: for Python bindings
+module load python/3.8.2
+
+# optional: an alias to request an interactive node for two hours
+alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# optimize CUDA compilation for V100
+export AMREX_CUDA_ARCH=7.0
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=$(which g++)
diff --git a/Tools/machines/ookami-sbu/ookami_warpx.profile.example b/Tools/machines/ookami-sbu/ookami_warpx.profile.example
new file mode 100644
index 00000000000..321e3ce1d59
--- /dev/null
+++ b/Tools/machines/ookami-sbu/ookami_warpx.profile.example
@@ -0,0 +1,27 @@
+# please set your project account (not relevant yet)
+#export proj=
+
+# required dependencies
+module load cmake/3.19.0
+module load gcc/10.3.0
+module load openmpi/gcc10/4.1.0
+
+# optional: faster builds (not available yet)
+#module load ccache
+#module load ninja
+
+# optional: for PSATD support (not available yet)
+#module load fftw
+
+# optional: for QED lookup table generation support (not available yet)
+#module load boost
+
+# optional: for openPMD support
+#module load adios2 # not available yet
+#module load hdf5 # only serial
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CXXFLAGS="-mcpu=a64fx"
diff --git a/Tools/BatchScripts/batch_perlmutter.sh b/Tools/machines/perlmutter-nersc/perlmutter.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_perlmutter.sh
rename to Tools/machines/perlmutter-nersc/perlmutter.sbatch
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example
new file mode 100644
index 00000000000..610a63c334d
--- /dev/null
+++ b/Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example
@@ -0,0 +1,39 @@
+# please set your project account
+#export proj= # LBNL/AMP: m3906_g
+
+# required dependencies
+module load cmake/3.22.0
+module swap PrgEnv-nvidia PrgEnv-gnu
+module load cudatoolkit
+
+# optional: just an additional text editor
+# module load nano # TODO: request from support
+
+# optional: for openPMD support
+module load cray-hdf5-parallel/1.12.0.7
+export CMAKE_PREFIX_PATH=$HOME/sw/perlmutter/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$HOME/sw/perlmutter/adios2-2.7.1:$CMAKE_PREFIX_PATH
+
+# optional: Python, ...
+# TODO
+
+# optional: an alias to request an interactive node for two hours
+function getNode() {
+ salloc -N 1 --ntasks-per-node=4 -t 2:00:00 -C gpu -c 32 -G 4 -A $proj
+}
+
+# GPU-aware MPI
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+# necessary to use CUDA-Aware MPI and run a job
+export CRAY_ACCEL_TARGET=nvidia80
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=$(which g++)
diff --git a/Tools/BatchScripts/batch_quartz.sh b/Tools/machines/quartz-llnl/quartz.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_quartz.sh
rename to Tools/machines/quartz-llnl/quartz.sbatch
diff --git a/Tools/machines/quartz-llnl/quartz_warpx.profile.example b/Tools/machines/quartz-llnl/quartz_warpx.profile.example
new file mode 100644
index 00000000000..370e4a601ac
--- /dev/null
+++ b/Tools/machines/quartz-llnl/quartz_warpx.profile.example
@@ -0,0 +1,37 @@
+# please set your project account
+#export proj=
+
+# required dependencies
+module load cmake/3.20.2
+module load intel/2021.4
+module load mvapich2/2.3
+
+# optional: for PSATD support
+module load fftw/3.3.8
+
+# optional: for QED lookup table generation support
+module load boost/1.73.0
+
+# optional: for openPMD support
+# TODO ADIOS2
+module load hdf5-parallel/1.10.2
+
+# optional: for PSATD in RZ geometry support
+# TODO: blaspp lapackpp
+
+# optional: for Python bindings
+module load python/3.8.2
+
+# optional: an alias to request an interactive node for two hours
+alias getNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug --pty bash"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# compiler environment hints
+export CC=$(which icc)
+export CXX=$(which icpc)
+export FC=$(which ifort)
+# we need a newer libstdc++:
+export CFLAGS="-gcc-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/gcc ${CFLAGS}"
+export CXXFLAGS="-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ ${CXXFLAGS}"
diff --git a/Tools/BatchScripts/batch_spock.sh b/Tools/machines/spock-olcf/spock_mi100.sbatch
similarity index 100%
rename from Tools/BatchScripts/batch_spock.sh
rename to Tools/machines/spock-olcf/spock_mi100.sbatch
diff --git a/Tools/machines/spock-olcf/spock_warpx.profile.example b/Tools/machines/spock-olcf/spock_warpx.profile.example
new file mode 100644
index 00000000000..6b10656c3bf
--- /dev/null
+++ b/Tools/machines/spock-olcf/spock_warpx.profile.example
@@ -0,0 +1,29 @@
+# please set your project account
+#export proj=
+
+# required dependencies
+module load cmake/3.20.2
+module load craype-accel-amd-gfx908
+module load rocm/4.3.0
+
+# optional: faster builds
+module load ccache
+module load ninja
+
+# optional: just an additional text editor
+module load nano
+
+# optional: an alias to request an interactive node for one hour
+alias getNode="salloc -A $proj -J warpx -t 01:00:00 -p ecp -N 1"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# optimize CUDA compilation for MI100
+export AMREX_AMD_ARCH=gfx908
+
+# compiler environment hints
+export CC=$ROCM_PATH/llvm/bin/clang
+export CXX=$(which hipcc)
+export LDFLAGS="-L${CRAYLIBS_X86_64} $(CC --cray-print-opts=libs) -lmpi"
+# GPU aware MPI: ${PE_MPICH_GTL_DIR_gfx908} -lmpi_gtl_hsa
diff --git a/Tools/BatchScripts/batch_summit_power9.sh b/Tools/machines/summit-olcf/summit_power9.bsub
similarity index 100%
rename from Tools/BatchScripts/batch_summit_power9.sh
rename to Tools/machines/summit-olcf/summit_power9.bsub
diff --git a/Tools/BatchScripts/script_profiling_summit.sh b/Tools/machines/summit-olcf/summit_profiling.bsub
similarity index 100%
rename from Tools/BatchScripts/script_profiling_summit.sh
rename to Tools/machines/summit-olcf/summit_profiling.bsub
diff --git a/Tools/BatchScripts/batch_summit.sh b/Tools/machines/summit-olcf/summit_v100.bsub
similarity index 100%
rename from Tools/BatchScripts/batch_summit.sh
rename to Tools/machines/summit-olcf/summit_v100.bsub
diff --git a/Tools/machines/summit-olcf/summit_warpx.profile.example b/Tools/machines/summit-olcf/summit_warpx.profile.example
new file mode 100644
index 00000000000..50e3993f064
--- /dev/null
+++ b/Tools/machines/summit-olcf/summit_warpx.profile.example
@@ -0,0 +1,76 @@
+# please set your project account
+#export proj=
+
+# optional: just an additional text editor
+module load nano
+
+# required dependencies
+module load cmake/3.20.2
+module load gcc/9.3.0
+module load cuda/11.3.1
+
+# optional: faster re-builds
+module load ccache
+
+# optional: for PSATD in RZ geometry support
+module load blaspp/2021.04.01
+module load lapackpp/2021.04.00
+
+# optional: for PSATD support (CPU only)
+#module load fftw/3.3.9
+
+# optional: for QED lookup table generation support
+module load boost/1.76.0
+
+# optional: for openPMD support
+module load adios2/2.7.1
+module load hdf5/1.10.7
+
+# optional: for openPMD support (GNUmake only)
+#module load ums
+#module load ums-aph114
+#module load openpmd-api/0.14.2
+
+# often unstable at runtime with dependencies
+module unload darshan-runtime
+
+# optional: Ascent in situ support
+# note: build WarpX with CMake
+export Ascent_DIR=/gpfs/alpine/csc340/world-shared/software/ascent/2021_09_01_gcc_9_3_0_warpx/summit/cuda/gnu/ascent-install
+
+# optional: for Python bindings or libEnsemble
+module load python/3.8.10
+module load freetype/2.10.4 # matplotlib
+
+# dependencies for numpy, blaspp & lapackpp
+module load openblas/0.3.5-omp
+export BLAS=${OLCF_OPENBLAS_ROOT}/lib/libopenblas.so
+export LAPACK=${OLCF_OPENBLAS_ROOT}/lib/libopenblas.so
+
+if [ -d "$HOME/sw/venvs/warpx" ]
+then
+ source $HOME/sw/venvs/warpx/bin/activate
+fi
+
+# an alias to request an interactive batch node for two hours
+# for paralle execution, start on the batch node: jsrun
+alias getNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -Is /bin/bash"
+# an alias to run a command on a batch node for up to 30min
+# usage: nrun
+alias runNode="bsub -q debug -P $proj -W 0:30 -nnodes 1 -I"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# make output group-readable by default
+umask 0027
+
+# optimize CUDA compilation for V100
+export AMREX_CUDA_ARCH=7.0
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=$(which g++)