chore: bump llama.cpp #132
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
pull_request: | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
darwin-metal: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64, arm64 ] | |
version: [ '3.0' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
brew update && brew install ccache | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_EMBED_LIBRARY=on \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool -L ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
linux-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags. | |
# 6.1 ==> 6.1.2 | |
# 5.7 ==> 5.7.1 | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html, | |
# https://rocm.docs.amd.com/en/docs-5.7.1/release/gpu_os_support.html. | |
arch: [ amd64 ] | |
version: [ '6.1', '5.7' ] | |
size: [ 's', 'l' ] | |
exclude: | |
- size: 'l' | |
version: '5.7' | |
include: | |
- size: 's' | |
hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102' | |
- size: 'l' | |
hip_arch: 'gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.size }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=/opt/rocm/llvm/bin/clang \ | |
--env CXX=/opt/rocm/llvm/bin/clang++ \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}-${{ matrix.size }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}-${{ matrix.size }} | |
linux-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.5 ==> 12.5.0 | |
# 11.8 ==> 11.8.0 | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
arch: [ amd64 ] | |
version: [ '12.5', '11.8' ] | |
size: [ 's', 'l' ] | |
exclude: | |
- size: 'l' | |
version: '11.8' | |
include: | |
- size: 's' | |
cuda_arch: '80-real;86-real;89' | |
- size: 'l' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.size }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.8.0' }}-devel-ubuntu22.04 | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}-${{ matrix.size }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}-${{ matrix.size }} | |
linux-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
arch: [ amd64 ] | |
version: [ '2024.2', '2024.1' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_SYCL=on -DGGML_SYCL_F16=on \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=icx \ | |
--env CXX=icpx \ | |
--env CCACHE_DIR \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
intel/oneapi-basekit:${{ matrix.version == '2024.2' && '2024.2.0' || '2024.1.1' }}-devel-ubuntu22.04 | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
linux-cann: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=. | |
# 8.0 ==> 8.0.RC1 | |
arch: [ amd64, arm64 ] | |
version: [ '8.0' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
source /usr/local/Ascend/ascend-toolkit/set_env.sh | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CANN=on \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CCACHE_DIR \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
ascendai/cann:${{ matrix.version }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }} | |
windows-hip: | |
continue-on-error: ${{ !startsWith(github.ref, 'refs/tags/') }} | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 5.7 ==> 5.7.1 | |
# 5.5 ==> 5.5.1 | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/en/docs-5.7.1/release/windows_support.html, | |
# https://rocm.docs.amd.com/en/docs-5.5.1/release/windows_support.html. | |
arch: [ amd64 ] | |
version: [ '5.7', '5.5' ] | |
size: [ 's', 'l' ] | |
exclude: | |
- size: 'l' | |
version: '5.5' | |
include: | |
- size: 's' | |
hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102' | |
- size: 'l' | |
hip_arch: 'gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.size }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache curl -y | |
- name: Setup HIP | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install AMD ROCm HIP SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-install' | |
Write-Host "I verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)" | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "HIP_PATH=${env:HIP_PATH}" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} ` | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}-${{ matrix.size }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}-${{ matrix.size }} | |
windows-cuda: | |
continue-on-error: ${{ !startsWith(github.ref, 'refs/tags/') }} | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.5 ==> 12.5.0 | |
# 11.8 ==> 11.8.0 | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
arch: [ amd64 ] | |
version: [ '12.5', '11.8' ] | |
size: [ 's', 'l' ] | |
exclude: | |
- size: 'l' | |
version: '11.8' | |
include: | |
- size: 's' | |
cuda_arch: '80-real;86-real;89' | |
- size: 'l' | |
version: '12.5' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89' | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://forums.developer.nvidia.com/t/problems-with-latest-vs2022-update/294150. | |
runs-on: ${{ matrix.version == '12.5' && 'windows-2022' || 'windows-2019' }} | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
# doesn't support ccache, | |
# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake. | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.size }} | |
path: | | |
${{ github.workspace }}\build | |
- name: Setup CUDA | |
# ensure MSBuildExtensions has been configured, | |
# see https://github.com/NVlabs/tiny-cuda-nn/issues/164#issuecomment-1280749170. | |
uses: Jimver/cuda-toolkit@v0.2.16 | |
with: | |
cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.8.0' }} | |
method: 'network' | |
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' | |
use-github-cache: false | |
use-local-cache: false | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "===== BUILD =====" | |
Write-Host "CUDA_PATH=${env:CUDA_PATH}" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} ` | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- /m:${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}-${{ matrix.size }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}-${{ matrix.size }} | |
windows-oneapi: | |
continue-on-error: ${{ !startsWith(github.ref, 'refs/tags/') }} | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=offline. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
arch: [ amd64 ] | |
version: [ '2024.2', '2024.1' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
# doesn't support ccache, | |
# as the oneAPI need to configure the environment variables via setvars.bat. | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\build | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install curl ninja -y | |
- name: Setup oneAPI | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install Intel oneAPI SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/${{ matrix.version == '2024.2' && 'e83a8e64-04fc-45df-85c6-c2208d03bdb5/w_BaseKit_p_2024.2.0.635' || '7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595' }}.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0' | |
Write-Host "I verify Intel oneAPI SDK" | |
& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version | |
$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)" | |
"ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
$oneapiRoot = "$(Split-Path -Path $oneapiPath)" | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}" | |
Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}" | |
& "${{ github.workspace }}\llama-box\scripts\build-windows-oneapi.bat" "${{ github.workspace }}" "${{ matrix.arch }}" | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin-metal | |
- linux-hip | |
- linux-cuda | |
- linux-oneapi | |
- linux-cann | |
- windows-hip | |
- windows-cuda | |
- windows-oneapi | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |