ci: reduce cuda binary size #79
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
pull_request: | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
# cache-able building with ccache. | |
darwin-metal: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64, arm64 ] | |
version: [ '3.0' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Deps | |
run: | | |
brew update && brew install ccache | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_EMBED_LIBRARY=on \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool -L ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags. | |
# 6.1 ==> 6.1.2 | |
# 5.7 ==> 5.7.1 | |
version: [ '6.1', '5.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html, | |
# https://rocm.docs.amd.com/en/docs-5.7.1/release/gpu_os_support.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=/opt/rocm/llvm/bin/clang \ | |
--env CXX=/opt/rocm/llvm/bin/clang++ \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "52;61;70;75;80" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}-devel-ubuntu22.04 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
version: [ '2024.2', '2024.1' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_SYCL=on -DGGML_SYCL_F16=on \ | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \ | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=icx \ | |
--env CXX=icpx \ | |
--env CCACHE_DIR \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
intel/oneapi-basekit:${{ matrix.version == '2024.2' && '2024.2.0' || '2024.1.1' }}-devel-ubuntu22.04 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
# cache-able building with ccache. | |
windows-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 5.7 ==> 5.7.1 | |
# 5.5 ==> 5.5.1 | |
version: [ '5.7', '5.5' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache curl -y | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Setup HIP | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install AMD ROCm HIP SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-install' | |
Write-Host "I verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)" | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/en/docs-5.7.1/release/windows_support.html, | |
# https://rocm.docs.amd.com/en/docs-5.5.1/release/windows_support.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "${{ matrix.version == '5.7' && 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' || 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' }}" | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "HIP_PATH=${env:HIP_PATH}" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} ` | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} | |
# uncache-able building, | |
# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake. | |
windows-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://forums.developer.nvidia.com/t/problems-with-latest-vs2022-update/294150. | |
runs-on: ${{ matrix.version == '12.5' && 'windows-2022' || 'windows-2019' }} | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup CUDA | |
# ensure MSBuildExtensions has been configured, | |
# see https://github.com/NVlabs/tiny-cuda-nn/issues/164#issuecomment-1280749170. | |
uses: Jimver/cuda-toolkit@v0.2.16 | |
with: | |
cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }} | |
method: 'network' | |
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' | |
use-github-cache: false | |
use-local-cache: false | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CUDA_ARCHITECTURES: "52;61;70;75;80" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "===== BUILD =====" | |
Write-Host "CUDA_PATH=${env:CUDA_PATH}" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} ` | |
-DGGML_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- /m:${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
# uncache-able building, | |
# as the oneAPI need to configure the environment variables via setvars.bat. | |
windows-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=offline. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
version: [ '2024.2', '2024.1' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install curl ninja -y | |
- name: Setup oneAPI | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install Intel oneAPI SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/${{ matrix.version == '2024.2' && 'e83a8e64-04fc-45df-85c6-c2208d03bdb5/w_BaseKit_p_2024.2.0.635' || '7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595' }}.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0' | |
Write-Host "I verify Intel oneAPI SDK" | |
& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version | |
$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)" | |
"ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
$oneapiRoot = "$(Split-Path -Path $oneapiPath)" | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}" | |
Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}" | |
& "${{ github.workspace }}\llama-box\scripts\build-windows-oneapi.bat" "${{ github.workspace }}" "${{ matrix.arch }}" | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin-metal | |
- linux-hip | |
- linux-cuda | |
- linux-oneapi | |
- windows-hip | |
- windows-cuda | |
- windows-oneapi | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |