Skip to content

Improve SIMD loading/storing #3771

Improve SIMD loading/storing

Improve SIMD loading/storing #3771

Workflow file for this run

name: CI
on:
push:
pull_request:
schedule:
- cron: '0 3 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
env:
THREADS: 2
CONFIG: RelWithDebInfo
jobs:
clang-format:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: DoozyX/clang-format-lint-action@v0.16.2
with:
exclude: './thirdparty'
extensions: 'cpp,hpp,h,cu'
clangFormatVersion: 16
inplace: True
- name: git diff
run: git diff
- uses: EndBug/add-and-commit@v9
with:
author_name: Third Party
author_email: llama@hzdr.de
message: 'Run clang-format'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
clang-tidy:
needs: clang-format
runs-on: ubuntu-22.04
env:
CXX: clang++-16
strategy:
fail-fast: false
matrix:
include:
- files: '../tests/m.*'
- files: '../tests/[^m].*'
- files: '../examples'
steps:
- uses: actions/checkout@v3
- name: add LLVM apt repo
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-16 main'
- name: install clang-16
run: |
sudo apt install clang-16 libomp-16-dev clang-tidy-16
- name: cmake
run: |
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=$CONFIG \
-DBUILD_TESTING=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-DCMAKE_BUILD_TYPE=$CONFIG \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \
-Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \
-Dalpaka_CXX_STANDARD=17 \
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake
- name: run clang-tidy
run: |
cd build
sed -i 's/\(-forward-unknown-to-host-compiler\|--generate-code=arch=[^ ]\+\|--expt-extended-lambda\|--extended-lambda\|--expt-relaxed-constexpr\|--use_fast_math\)//g' compile_commands.json # remove NVCC specific flags which clang cannot handle
run-clang-tidy-16 -j $THREADS -header-filter='(tests|include/llama|examples)' -extra-arg=--no-cuda-version-check -extra-arg=-nocudalib -extra-arg=-Wno-unused-command-line-argument ${{ matrix.files }}
coverage:
needs: clang-format
runs-on: ubuntu-22.04
env:
CXX: g++
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
- name: install lcov
run: |
sudo apt install lcov
- name: cmake
run: |
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug \
-DBUILD_TESTING=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_ENABLE_COVERAGE_FOR_TESTS=ON \
-DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
- name: build tests
run: |
cmake --build build -j $THREADS
- name: run tests
run: |
build/tests
- name: generate coverage report
run: |
lcov --capture --directory build --output-file coverage.info
#lcov --remove coverage.info '/usr/*' --output-file coverage.info
#lcov --list coverage.info
- name: upload coverage report
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
verbose: true
amalgamation:
needs: clang-format
runs-on: ubuntu-22.04
env:
CXX: g++
steps:
- uses: actions/checkout@v3
- name: create llama.hpp
run: ./tools/create-single-header.sh
- run: vcpkg install # installs manifest in a folder called `vcpkg_installed` next to the manifest
- name: test llama.hpp
run: |
mkdir build
cd build
mkdir llama
cp -p ../single-header/llama.hpp llama
$CXX -std=c++20 -I../vcpkg_installed/x64-linux/include -I. ../examples/heatequation/heatequation.cpp
- name: upload llama.hpp
uses: actions/upload-artifact@v3
with:
name: llama.hpp
path: single-header/llama.hpp
if-no-files-found: error
build-ubuntu:
needs: clang-format
runs-on: ${{ matrix.os || 'ubuntu-22.04' }}
env:
CXX: ${{ matrix.cxx }}
CUDACXX: ${{ matrix.cudacxx }}
name: ${{ matrix.name }}
strategy:
fail-fast: false
matrix:
include:
- name: build-ubuntu-gcc9
cxx: g++-9
- name: build-ubuntu-gcc10
cxx: g++-10
- name: build-ubuntu-gcc10-nvcc11.6
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run
- name: build-ubuntu-gcc11-asan
cxx: g++-11
asan: ON
- name: build-ubuntu-gcc11-nvcc11.7
cxx: g++-11
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run
- name: build-ubuntu-gcc11-nvcc11.8
cxx: g++-11
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
- name: build-ubuntu-gcc12
cxx: g++-12
install_extra: g++-12
- name: build-ubuntu-gcc12-nvcc12.0
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run
- name: build-ubuntu-gcc12-nvcc12.1
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
- name: build-ubuntu-gcc12-nvcc12.2-asan
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux.run
asan: ON
- name: build-ubuntu-gcc12-nvcc12.3
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.3.1/local_installers/cuda_12.3.1_545.23.08_linux.run
- name: build-ubuntu-gcc13-asan
cxx: g++-13
add_toolchain_repo: true
asan: ON
install_extra: g++-13
- name: build-ubuntu-clang12
cxx: clang++-12
install_extra: clang-12 libomp-12-dev
- name: build-ubuntu-clang13
cxx: clang++-13
install_extra: clang-13 libomp-13-dev
- name: build-ubuntu-clang14
cxx: clang++-14
install_extra: clang-14 libomp-14-dev
- name: build-ubuntu-clang15
cxx: clang++-15
install_extra: clang-15 libomp-15-dev
add_llvm_repo: true
- name: build-ubuntu-clang16
cxx: clang++-16
install_extra: clang-16 libomp-16-dev
add_llvm_repo: true
- name: build-ubuntu-clang16-cuda12.1
cxx: clang++-16
cudacxx: clang++-16
install_extra: clang-16 libomp-16-dev
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
add_llvm_repo: true
cxx_std: 20
- name: build-ubuntu-clang17-asan
cxx: clang++-17
install_extra: clang-17 libomp-17-dev
add_llvm_repo: true
asan: ON
- name: build-ubuntu-icpx
cxx: icpx
add_oneapi_repo: true
install_extra: intel-oneapi-compiler-dpcpp-cpp g++-12 # for libstdc++
cxx_flags: --gcc-install-dir=/usr/lib/gcc/x86_64-linux-gnu/12
- name: build-ubuntu-nvc++
cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/23.5/compilers/bin/nvc++
add_nvcpp_repo: true
install_extra: nvhpc-23-5
steps:
- uses: actions/checkout@v3
- run: lscpu
- name: add ubuntu toolchain repo
if: matrix.add_toolchain_repo
run: |
sudo add-apt-repository ppa:ubuntu-toolchain-r/ppa
sudo apt update
- name: add LLVM apt repo
if: matrix.add_llvm_repo
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-12 main'
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-13 main'
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-14 main'
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-15 main'
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-16 main'
sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main'
- name: add CUDA apt repo
if: matrix.add_nvcpp_repo
run: |
echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list
sudo apt update
- name: install OneAPI
if: matrix.add_oneapi_repo
run: |
# See: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2023-2/apt.html
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
| sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt update
- name: install extras
if: ${{ matrix.install_extra }}
run: |
sudo apt install ${{ matrix.install_extra }}
- name: remove g++-13 to fix old clang builds
if: ${{ matrix.cxx == 'clang++-11' || matrix.cxx == 'clang++-12' || matrix.cxx == 'clang++-13' || matrix.cxx == 'clang++-14' || matrix.cxx == 'clang++-15' || matrix.cudacxx == 'clang++-16' }}
run: |
sudo apt remove gcc-13 g++-13 libstdc++-13-dev gcc g++ libstdc++-dev
sudo apt autoremove
sudo apt install g++-12
- name: download CUDA
if: matrix.cuda_url
run: |
wget --no-verbose -O cuda_installer.run ${{ matrix.cuda_url }}
- name: install CUDA
if: matrix.cuda_url
run: |
sudo sh cuda_installer.run --silent --toolkit --override
- name: cmake
run: |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi
mkdir build
cd build
# try to find nvcc if no CUDACXX is provided
if [ -z "$CUDACXX" ]; then
CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`)
if [ ! -f $CUDACXX ]; then
unset CUDACXX
fi
fi
echo "CUDACXX is here: $CUDACXX"
CXX_FLAGS=${{ matrix.cxx_flags }}
if [ ${{ matrix.add_nvcpp_repo }} ]; then
# cmake (in some versions) passes some flags that nvc++ does not understand
CXX_FLAGS+=" -noswitcherror"
fi
$CXX -v
cmake .. -DBUILD_TESTING=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-DCMAKE_BUILD_TYPE=$CONFIG \
-DLLAMA_ENABLE_ASAN_FOR_TESTS=${{ matrix.asan || 'OFF' }} \
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=${{ !matrix.cuda_url }} \
-Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \
-Dalpaka_ACC_GPU_CUDA_ENABLE=${{ !!matrix.cuda_url }} \
-Dalpaka_CXX_STANDARD=${{ matrix.cxx_std || '17' }} \
-DCMAKE_CUDA_COMPILER=$CUDACXX \
-DCMAKE_CUDA_HOST_COMPILER=$CXX \
-DCMAKE_CXX_FLAGS="$CXX_FLAGS" \
-DCMAKE_CUDA_FLAGS="${{ matrix.cuda_flags }}" \
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake || true
cat /usr/local/share/vcpkg/buildtrees/detect_compiler/config-x64-linux-rel-out.log || true
cat /usr/local/share/vcpkg/buildtrees/detect_compiler/config-x64-linux-rel-err.log || true
- name: build tests + examples
run: |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi
cmake --build build -j $THREADS
- name: run tests
run: |
if [ ${{ matrix.add_oneapi_repo }} ]; then source /opt/intel/oneapi/setvars.sh; fi
build/tests
build-windows:
needs: clang-format
runs-on: ${{ matrix.runs-on }}
env:
VCPKG_DEFAULT_TRIPLET: x64-windows
name: ${{ matrix.name }}
strategy:
fail-fast: false
matrix:
include:
- name: build-windows-VS2022
runs-on: windows-2022
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
- name: cmake
run: |
mkdir build
cd build
cmake .. -DBUILD_TESTING=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \
-DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
- name: build tests + examples
run: cmake --build build -j $THREADS --config $CONFIG
- name: run tests
run: |
build/$CONFIG/tests
build-macos:
needs: clang-format
runs-on: ${{ matrix.os }}
name: build-${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: macos-12
- os: macos-13
steps:
- uses: actions/checkout@v3
- name: brew install dependencies
run: |
brew install llvm libomp pkg-config
echo "CXX is here: $(brew --prefix llvm)/bin/clang++"
- name: cmake
run: |
mkdir build
cd build
export CXX="$(brew --prefix llvm)/bin/clang++"
cmake .. -DBUILD_TESTING=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-DCMAKE_BUILD_TYPE=$CONFIG \
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON \
-Dalpaka_CXX_STANDARD=17 \
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake
- name: build tests + examples
run: |
cmake --build build -j $THREADS
- name: run tests
run: |
build/tests