Improve SIMD loading/storing #3771
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
push: | |
pull_request: | |
schedule: | |
- cron: '0 3 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} | |
cancel-in-progress: true | |
env: | |
THREADS: 2 | |
CONFIG: RelWithDebInfo | |
jobs: | |
clang-format: | |
runs-on: ubuntu-22.04 | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: DoozyX/clang-format-lint-action@v0.16.2 | |
with: | |
exclude: './thirdparty' | |
extensions: 'cpp,hpp,h,cu' | |
clangFormatVersion: 16 | |
inplace: True | |
- name: git diff | |
run: git diff | |
- uses: EndBug/add-and-commit@v9 | |
with: | |
author_name: Third Party | |
author_email: llama@hzdr.de | |
message: 'Run clang-format' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
clang-tidy: | |
needs: clang-format | |
runs-on: ubuntu-22.04 | |
env: | |
CXX: clang++-16 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- files: '../tests/m.*' | |
- files: '../tests/[^m].*' | |
- files: '../examples' | |
steps: | |
- uses: actions/checkout@v3 | |
- name: add LLVM apt repo | |
run: | | |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-16 main' | |
- name: install clang-16 | |
run: | | |
sudo apt install clang-16 libomp-16-dev clang-tidy-16 | |
- name: cmake | |
run: | | |
mkdir build | |
cd build | |
cmake .. -DCMAKE_BUILD_TYPE=$CONFIG \ | |
-DBUILD_TESTING=ON \ | |
-DLLAMA_BUILD_EXAMPLES=ON \ | |
-DCMAKE_BUILD_TYPE=$CONFIG \ | |
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ | |
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \ | |
-Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \ | |
-Dalpaka_CXX_STANDARD=17 \ | |
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake | |
- name: run clang-tidy | |
run: | | |
cd build | |
sed -i 's/\(-forward-unknown-to-host-compiler\|--generate-code=arch=[^ ]\+\|--expt-extended-lambda\|--extended-lambda\|--expt-relaxed-constexpr\|--use_fast_math\)//g' compile_commands.json # remove NVCC specific flags which clang cannot handle | |
run-clang-tidy-16 -j $THREADS -header-filter='(tests|include/llama|examples)' -extra-arg=--no-cuda-version-check -extra-arg=-nocudalib -extra-arg=-Wno-unused-command-line-argument ${{ matrix.files }} | |
coverage: | |
needs: clang-format | |
runs-on: ubuntu-22.04 | |
env: | |
CXX: g++ | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: install lcov | |
run: | | |
sudo apt install lcov | |
- name: cmake | |
run: | | |
mkdir build | |
cd build | |
cmake .. -DCMAKE_BUILD_TYPE=Debug \ | |
-DBUILD_TESTING=ON \ | |
-DLLAMA_BUILD_EXAMPLES=OFF \ | |
-DLLAMA_ENABLE_COVERAGE_FOR_TESTS=ON \ | |
-DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake | |
- name: build tests | |
run: | | |
cmake --build build -j $THREADS | |
- name: run tests | |
run: | | |
build/tests | |
- name: generate coverage report | |
run: | | |
lcov --capture --directory build --output-file coverage.info | |
#lcov --remove coverage.info '/usr/*' --output-file coverage.info | |
#lcov --list coverage.info | |
- name: upload coverage report | |
uses: codecov/codecov-action@v3 | |
with: | |
token: ${{ secrets.CODECOV_TOKEN }} | |
fail_ci_if_error: true | |
verbose: true | |
amalgamation: | |
needs: clang-format | |
runs-on: ubuntu-22.04 | |
env: | |
CXX: g++ | |
steps: | |
- uses: actions/checkout@v3 | |
- name: create llama.hpp | |
run: ./tools/create-single-header.sh | |
- run: vcpkg install # installs manifest in a folder called `vcpkg_installed` next to the manifest | |
- name: test llama.hpp | |
run: | | |
mkdir build | |
cd build | |
mkdir llama | |
cp -p ../single-header/llama.hpp llama | |
$CXX -std=c++20 -I../vcpkg_installed/x64-linux/include -I. ../examples/heatequation/heatequation.cpp | |
- name: upload llama.hpp | |
uses: actions/upload-artifact@v3 | |
with: | |
name: llama.hpp | |
path: single-header/llama.hpp | |
if-no-files-found: error | |
build-ubuntu: | |
needs: clang-format | |
runs-on: ${{ matrix.os || 'ubuntu-22.04' }} | |
env: | |
CXX: ${{ matrix.cxx }} | |
CUDACXX: ${{ matrix.cudacxx }} | |
name: ${{ matrix.name }} | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- name: build-ubuntu-gcc9 | |
cxx: g++-9 | |
- name: build-ubuntu-gcc10 | |
cxx: g++-10 | |
- name: build-ubuntu-gcc10-nvcc11.6 | |
cxx: g++-10 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run | |
- name: build-ubuntu-gcc11-asan | |
cxx: g++-11 | |
asan: ON | |
- name: build-ubuntu-gcc11-nvcc11.7 | |
cxx: g++-11 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run | |
- name: build-ubuntu-gcc11-nvcc11.8 | |
cxx: g++-11 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run | |
- name: build-ubuntu-gcc12 | |
cxx: g++-12 | |
install_extra: g++-12 | |
- name: build-ubuntu-gcc12-nvcc12.0 | |
cxx: g++-12 | |
install_extra: g++-12 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run | |
- name: build-ubuntu-gcc12-nvcc12.1 | |
cxx: g++-12 | |
install_extra: g++-12 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run | |
- name: build-ubuntu-gcc12-nvcc12.2-asan | |
cxx: g++-12 | |
install_extra: g++-12 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux.run | |
asan: ON | |
- name: build-ubuntu-gcc12-nvcc12.3 | |
cxx: g++-12 | |
install_extra: g++-12 | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.3.1/local_installers/cuda_12.3.1_545.23.08_linux.run | |
- name: build-ubuntu-gcc13-asan | |
cxx: g++-13 | |
add_toolchain_repo: true | |
asan: ON | |
install_extra: g++-13 | |
- name: build-ubuntu-clang12 | |
cxx: clang++-12 | |
install_extra: clang-12 libomp-12-dev | |
- name: build-ubuntu-clang13 | |
cxx: clang++-13 | |
install_extra: clang-13 libomp-13-dev | |
- name: build-ubuntu-clang14 | |
cxx: clang++-14 | |
install_extra: clang-14 libomp-14-dev | |
- name: build-ubuntu-clang15 | |
cxx: clang++-15 | |
install_extra: clang-15 libomp-15-dev | |
add_llvm_repo: true | |
- name: build-ubuntu-clang16 | |
cxx: clang++-16 | |
install_extra: clang-16 libomp-16-dev | |
add_llvm_repo: true | |
- name: build-ubuntu-clang16-cuda12.1 | |
cxx: clang++-16 | |
cudacxx: clang++-16 | |
install_extra: clang-16 libomp-16-dev | |
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run | |
add_llvm_repo: true | |
cxx_std: 20 | |
- name: build-ubuntu-clang17-asan | |
cxx: clang++-17 | |
install_extra: clang-17 libomp-17-dev | |
add_llvm_repo: true | |
asan: ON | |
- name: build-ubuntu-icpx | |
cxx: icpx | |
add_oneapi_repo: true | |
install_extra: intel-oneapi-compiler-dpcpp-cpp g++-12 # for libstdc++ | |
cxx_flags: --gcc-install-dir=/usr/lib/gcc/x86_64-linux-gnu/12 | |
- name: build-ubuntu-nvc++ | |
cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/23.5/compilers/bin/nvc++ | |
add_nvcpp_repo: true | |
install_extra: nvhpc-23-5 | |
steps: | |
- uses: actions/checkout@v3 | |
- run: lscpu | |
- name: add ubuntu toolchain repo | |
if: matrix.add_toolchain_repo | |
run: | | |
sudo add-apt-repository ppa:ubuntu-toolchain-r/ppa | |
sudo apt update | |
- name: add LLVM apt repo | |
if: matrix.add_llvm_repo | |
run: | | |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-12 main' | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-13 main' | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-14 main' | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-15 main' | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-16 main' | |
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main' | |
- name: add CUDA apt repo | |
if: matrix.add_nvcpp_repo | |
run: | | |
echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list | |
sudo apt update | |
- name: install OneAPI | |
if: matrix.add_oneapi_repo | |
run: | | |
# See: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2023-2/apt.html | |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | |
| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null | |
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \ | |
| sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update | |
- name: install extras | |
if: ${{ matrix.install_extra }} | |
run: | | |
sudo apt install ${{ matrix.install_extra }} | |
- name: remove g++-13 to fix old clang builds | |
if: ${{ matrix.cxx == 'clang++-11' || matrix.cxx == 'clang++-12' || matrix.cxx == 'clang++-13' || matrix.cxx == 'clang++-14' || matrix.cxx == 'clang++-15' || matrix.cudacxx == 'clang++-16' }} | |
run: | | |
sudo apt remove gcc-13 g++-13 libstdc++-13-dev gcc g++ libstdc++-dev | |
sudo apt autoremove | |
sudo apt install g++-12 | |
- name: download CUDA | |
if: matrix.cuda_url | |
run: | | |
wget --no-verbose -O cuda_installer.run ${{ matrix.cuda_url }} | |
- name: install CUDA | |
if: matrix.cuda_url | |
run: | | |
sudo sh cuda_installer.run --silent --toolkit --override | |
- name: cmake | |
run: | | |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi | |
mkdir build | |
cd build | |
# try to find nvcc if no CUDACXX is provided | |
if [ -z "$CUDACXX" ]; then | |
CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`) | |
if [ ! -f $CUDACXX ]; then | |
unset CUDACXX | |
fi | |
fi | |
echo "CUDACXX is here: $CUDACXX" | |
CXX_FLAGS=${{ matrix.cxx_flags }} | |
if [ ${{ matrix.add_nvcpp_repo }} ]; then | |
# cmake (in some versions) passes some flags that nvc++ does not understand | |
CXX_FLAGS+=" -noswitcherror" | |
fi | |
$CXX -v | |
cmake .. -DBUILD_TESTING=ON \ | |
-DLLAMA_BUILD_EXAMPLES=ON \ | |
-DCMAKE_BUILD_TYPE=$CONFIG \ | |
-DLLAMA_ENABLE_ASAN_FOR_TESTS=${{ matrix.asan || 'OFF' }} \ | |
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=${{ !matrix.cuda_url }} \ | |
-Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \ | |
-Dalpaka_ACC_GPU_CUDA_ENABLE=${{ !!matrix.cuda_url }} \ | |
-Dalpaka_CXX_STANDARD=${{ matrix.cxx_std || '17' }} \ | |
-DCMAKE_CUDA_COMPILER=$CUDACXX \ | |
-DCMAKE_CUDA_HOST_COMPILER=$CXX \ | |
-DCMAKE_CXX_FLAGS="$CXX_FLAGS" \ | |
-DCMAKE_CUDA_FLAGS="${{ matrix.cuda_flags }}" \ | |
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake || true | |
cat /usr/local/share/vcpkg/buildtrees/detect_compiler/config-x64-linux-rel-out.log || true | |
cat /usr/local/share/vcpkg/buildtrees/detect_compiler/config-x64-linux-rel-err.log || true | |
- name: build tests + examples | |
run: | | |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi | |
cmake --build build -j $THREADS | |
- name: run tests | |
run: | | |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi | |
build/tests | |
build-windows: | |
needs: clang-format | |
runs-on: ${{ matrix.runs-on }} | |
env: | |
VCPKG_DEFAULT_TRIPLET: x64-windows | |
name: ${{ matrix.name }} | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- name: build-windows-VS2022 | |
runs-on: windows-2022 | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v3 | |
- name: cmake | |
run: | | |
mkdir build | |
cd build | |
cmake .. -DBUILD_TESTING=ON \ | |
-DLLAMA_BUILD_EXAMPLES=ON \ | |
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \ | |
-DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake" | |
- name: build tests + examples | |
run: cmake --build build -j $THREADS --config $CONFIG | |
- name: run tests | |
run: | | |
build/$CONFIG/tests | |
build-macos: | |
needs: clang-format | |
runs-on: ${{ matrix.os }} | |
name: build-${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- os: macos-12 | |
- os: macos-13 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: brew install dependencies | |
run: | | |
brew install llvm libomp pkg-config | |
echo "CXX is here: $(brew --prefix llvm)/bin/clang++" | |
- name: cmake | |
run: | | |
mkdir build | |
cd build | |
export CXX="$(brew --prefix llvm)/bin/clang++" | |
cmake .. -DBUILD_TESTING=ON \ | |
-DLLAMA_BUILD_EXAMPLES=ON \ | |
-DCMAKE_BUILD_TYPE=$CONFIG \ | |
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \ | |
-Dalpaka_CXX_STANDARD=17 \ | |
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake | |
- name: build tests + examples | |
run: | | |
cmake --build build -j $THREADS | |
- name: run tests | |
run: | | |
build/tests |