fix: embedding crashing #601
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
LLAMA_BOX_BUILD_VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- "main" | |
- "branch-v*.*" | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
- "!.github/workflows/ci.yml" | |
pull_request: | |
branches: | |
- "main" | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
- "!.github/workflows/ci.yml" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
# Disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
jobs: | |
darwin: | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-darwin-${{ matrix.arch }}-${{ matrix.instruction }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
brew update && brew install ccache | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=off \ | |
-DGGML_NATIVE=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool --version | |
otool -L ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }} | |
darwin-metal: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: | |
- 'amd64' | |
- 'arm64' | |
version: | |
- '3.0' | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
brew update && brew install ccache | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on \ | |
-DGGML_NATIVE=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool --version | |
otool -L ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
linux: | |
strategy: | |
fail-fast: false | |
matrix: | |
# AVX2 ==> CentOS 7. | |
# AVX512 ==> RockyLinux 8.9. | |
# NEON ==> Ubuntu 18.04. | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
distro_container_image: 'centos:7' | |
- arch: 'amd64' | |
instruction: 'avx512' | |
distro_container_image: 'rockylinux:8.9' | |
- arch: 'arm64' | |
instruction: 'neon' | |
distro_container_image: 'ubuntu:18.04' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-${{ matrix.arch }}-${{ matrix.instruction }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CMAKE_VERSION: "3.22.1" | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "ubuntu" ]; then | |
apt-get update -y \ | |
&& apt-get install -y binutils pkg-config build-essential libopenblas-dev ccache curl git bc | |
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then | |
apt-get update -y \ | |
&& apt-get install -y software-properties-common | |
add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
apt-get update -y \ | |
&& apt-get install -y gcc-11 g++-11 | |
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 | |
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 | |
fi | |
elif [ "\${ID}" = "rocky" ]; then | |
# NB(thxCode): Enbale EPEL, see | |
# https://wiki.rockylinux.org/rocky/repo/#notes-on-epel. | |
dnf install -y epel-release | |
if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then | |
dnf config-manager --set-enabled powertools | |
else | |
dnf config-manager --set-enabled crb | |
fi | |
dnf install -y binutils pkgconfig gcc gcc-c++ make glibc-static libstdc++-static openblas-static ccache curl git | |
if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then | |
dnf install -y gcc-toolset-11 | |
source scl_source enable gcc-toolset-11 | |
fi | |
elif [ "\${ID}" = "centos" ]; then | |
# NB(thxCode): Patch for CentOS, see | |
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7. | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum install -y centos-release-scl | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum update -y \ | |
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel | |
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \ | |
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \ | |
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \ | |
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \ | |
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \ | |
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \ | |
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \ | |
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib" | |
yum --enablerepo=extras install -y epel-release | |
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm | |
yum update -y \ | |
&& yum install -y make glibc-static libstdc++-static openblas-static ccache curl git | |
cat <<EOL >/usr/lib64/pkgconfig/openblas.pc | |
Name: OpenBLAS | |
Description: OpenBLAS library | |
Version: 0.3.3 | |
Libs: -L/usr/lib64 -lopenblas | |
Cflags: -I/usr/include/openblas | |
EOL | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1 | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_NATIVE=off \ | |
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX2=on' || '' }} \ | |
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on -DGGML_AVX512_BF16=on' || '' }} \ | |
${{ matrix.instruction == 'neon' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_BLAS_VENDOR=OpenBLAS \ | |
-DGGML_STATIC=on \ | |
-DGGML_BLAS=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }} | |
linux-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/rocm/dev-centos-7/tags. | |
# 6.1 ==> 6.1.2, CentOS 7. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html. | |
include: | |
- arch: 'amd64' | |
version: '6.1' | |
distro_container_image: 'rocm/dev-centos-7:6.1.2-complete' | |
hip_arch: 'gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CMAKE_VERSION: "3.22.1" | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "centos" ]; then | |
# NB(thxCode): Patch for CentOS, see | |
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7. | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum install -y centos-release-scl | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum update -y \ | |
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel | |
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \ | |
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \ | |
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \ | |
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \ | |
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \ | |
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \ | |
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \ | |
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib" | |
yum --enablerepo=extras install -y epel-release | |
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm | |
yum update -y \ | |
&& yum install -y make glibc-static libstdc++-static ccache curl git | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1 | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
-DCMAKE_HIP_COMPILER="\$(hipconfig -l)/clang" \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CUDA_DMMV_X=4096 \ | |
-DGGML_CUDA_MMV_Y=256 \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }} | |
linux-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.4 ==> 12.4.0, CentOS 7. | |
# 11.8 ==> 11.8.0, CentOS 7 | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
include: | |
- arch: 'amd64' | |
version: '12.4' | |
distro_container_image: 'nvidia/cuda:12.4.0-devel-centos7' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'amd64' | |
version: '11.8' | |
distro_container_image: 'nvidia/cuda:11.8.0-devel-centos7' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CMAKE_VERSION: "3.22.1" | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "centos" ]; then | |
# NB(thxCode): Patch for CentOS, see | |
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7. | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum install -y centos-release-scl | |
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \ | |
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | |
yum update -y \ | |
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel | |
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \ | |
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \ | |
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \ | |
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \ | |
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \ | |
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \ | |
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \ | |
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib" | |
yum --enablerepo=extras install -y epel-release | |
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm | |
yum update -y \ | |
&& yum install -y make glibc-static libstdc++-static ccache curl git | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1 | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CUDA_DMMV_X=4096 \ | |
-DGGML_CUDA_MMV_Y=256 \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
linux-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel. | |
# 2025.0 ==> 2025.0.0-0, Ubuntu 22.04. | |
include: | |
- arch: 'amd64' | |
version: '2025.0' | |
distro_container_image: 'intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CMAKE_VERSION: "3.22.1" | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "ubuntu" ]; then | |
apt-get update -y \ | |
&& apt-get install -y build-essential ccache curl git bc | |
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then | |
apt-get update -y \ | |
&& apt-get install -y software-properties-common | |
add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
apt-get update -y \ | |
&& apt-get install -y gcc-11 g++-11 | |
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 | |
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 | |
fi | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1 | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_SYCL=on -DGGML_SYCL_F16=on \ | |
-DGGML_NATIVE=off \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=icx \ | |
--env CXX=icpx \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
linux-cann: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=8.0.rc2.alpha003-910b. | |
# 8.0 ==> 8.0.rc2.alpha003, Ubuntu 20.04, OpenEuler 20.03 | |
arch: | |
- 'amd64' | |
- 'arm64' | |
version: | |
- '8.0' | |
distro_container_image: | |
- 'ascendai/cann:8.0.rc2.alpha003-910b-openeuler20.03-py3.9' | |
- 'ascendai/cann:8.0.rc2.alpha003-910b-ubuntu20.04-py3.9' | |
- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-openeuler20.03-py3.9' | |
- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-ubuntu20.04-py3.9' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.distro_container_image }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CMAKE_VERSION: "3.22.1" | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "ubuntu" ]; then | |
apt-get update -y \ | |
&& apt-get install -y build-essential ccache curl git bc | |
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then | |
apt-get update -y \ | |
&& apt-get install -y software-properties-common | |
add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
apt-get update -y \ | |
&& apt-get install -y gcc-11 g++-11 | |
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 | |
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 | |
fi | |
elif [ "\${ID}" = "openEuler" ]; then | |
yum update -y \ | |
&& yum install -y gcc gcc-c++ gcc-toolset-10-gcc* | |
yum update -y \ | |
&& yum install -y gcc-toolset-10-libstdc++-static make ccache curl git | |
export PATH="/opt/openEuler/gcc-toolset-10/root/usr/bin/:\${PATH}" && \ | |
export LD_LIBRARY_PATH="/opt/openEuler/gcc-toolset-10/root/usr/lib64/:\${LD_LIBRARY_PATH}" | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
source /usr/local/Ascend/ascend-toolkit/set_env.sh | |
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1 | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CANN=on \ | |
-DSOC_TYPE=${{ contains(matrix.distro_container_image, '910b') && 'Ascend910B3' || 'Ascend310P3' }} \ | |
-DGGML_NATIVE=off \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
${{ contains(matrix.distro_container_image, '310p') && '-DGGML_AVX2=off' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }} | |
linux-musa: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/mthreads/musa/tags?page_size=&ordering=&name=ubuntu20.04. | |
# rc3.1.0 ==> rc3.1.0, Ubuntu 20.04. | |
include: | |
- arch: 'amd64' | |
version: 'rc3.1' | |
distro_container_image: 'mthreads/musa:rc3.1.0-devel-ubuntu22.04' | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-musa-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v8.1.5 | |
platforms: "arm64" | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "ubuntu" ]; then | |
apt-get update -y \ | |
&& apt-get install -y build-essential ccache curl git cmake | |
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then | |
apt-get update -y \ | |
&& apt-get install -y software-properties-common | |
add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
apt-get update -y \ | |
&& apt-get install -y gcc-11 g++-11 | |
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 | |
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 | |
fi | |
else | |
echo "Unsupport distribution: \$ID" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_MUSA=on \ | |
-DGGML_NATIVE=off \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }} | |
windows: | |
strategy: | |
fail-fast: false | |
matrix: | |
# AVX2 ==> Windows Server 2022. | |
# AVX512 ==> Windows Server 2022. | |
# NEON ==> Windows Server 2022. | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
- arch: 'amd64' | |
instruction: 'avx512' | |
- arch: 'arm64' | |
instruction: 'neon' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
# doesn't support ccache, | |
# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake. | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-${{ matrix.arch }}-${{ matrix.instruction }} | |
path: | | |
${{ github.workspace }}\build | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache ninja curl -y | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
cmake ${{ matrix.arch == 'arm64' && '-G "Ninja Multi-Config"'|| '' }} -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
${{ matrix.arch == 'arm64' && format('-DCMAKE_TOOLCHAIN_FILE={0}\llama-box\scripts\build-windows-arm64.cmake', github.workspace) || '' }} ` | |
-DGGML_NATIVE=off ` | |
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX2=on' || '' }} ` | |
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on -DGGML_AVX512_BF16=on' || '' }} ` | |
-DGGML_STATIC=on ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- ${{ matrix.arch == 'arm64' && '-j ' || '/m:' }}${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\Release\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }} | |
windows-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 6.1 ==> 6.1.2, Windows Server 2022. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html. | |
include: | |
- arch: 'amd64' | |
version: '6.1' | |
distro_binary_installer: 'https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe' | |
hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache curl -y | |
- name: Setup HIP | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download AMD ROCm HIP SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install AMD ROCm HIP SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-install' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)" | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "HIP_PATH=${env:HIP_PATH}" | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DGGML_HIP=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
-DGGML_NATIVE=off ` | |
-DGGML_CUDA_DMMV_X=4096 ` | |
-DGGML_CUDA_MMV_Y=256 ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} | |
windows-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.4 ==> 12.4.1, Windows Server 2022. | |
# 11.8 ==> 11.8.0, Windows Server 2019. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
include: | |
- arch: 'amd64' | |
version: '12.4' | |
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'amd64' | |
version: '11.8' | |
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
runs-on: ${{ matrix.version == '11.8' && 'windows-2019' || 'windows-2022' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache ninja curl -y | |
- name: Setup CUDA | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download NVIDIA CUDA SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install NVIDIA CUDA SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','nvcc_${{ matrix.version }}','cudart_${{ matrix.version }}','cublas_${{ matrix.version }}','cublas_dev_${{ matrix.version }}','thrust_${{ matrix.version }}','visual_studio_integration_${{ matrix.version }}' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify NVIDIA CUDA SDK" | |
& 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' --version | |
$cudaPath = "$(Resolve-Path -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' | Split-Path | Split-Path)" | |
$cudaVersion=($cudaPath | Split-Path -Leaf ) -replace 'v(\d+).(\d+)', '$1_$2' | |
"CUDA_PATH=${cudaPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
"CUDA_PATH_V${cudaVersion}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Append | |
"CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVersion}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") { | |
cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} else { | |
cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "CUDA_PATH=${env:CUDA_PATH}" | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
-DGGML_NATIVE=off ` | |
-DGGML_CUDA_DMMV_X=4096 ` | |
-DGGML_CUDA_MMV_Y=256 ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
windows-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=online. | |
# 2025.0 ==> 2025.0.0, Windows Server 2022. | |
include: | |
- arch: 'amd64' | |
version: '2025.0' | |
distro_binary_installer: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882.exe' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
# doesn't support ccache, | |
# as the oneAPI need to configure the environment variables via setvars.bat. | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\build | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ninja curl -y | |
- name: Setup oneAPI | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download Intel oneAPI SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install Intel oneAPI SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify Intel oneAPI SDK" | |
& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version | |
$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)" | |
"ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
$oneapiRoot = "$(Split-Path -Path $oneapiPath)" | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
cmd /c "call `"${oneapiRoot}\setvars.bat`" && set" | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
- name: Build | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}" | |
Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}" | |
Write-Host "===== BUILD =====" | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ` | |
-DGGML_SYCL=on -DGGML_SYCL_F16=on ` | |
-DGGML_NATIVE=off ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin | |
- darwin-metal | |
- linux | |
- linux-hip | |
- linux-cuda | |
- linux-oneapi | |
- linux-cann | |
- linux-musa | |
- windows | |
- windows-hip | |
- windows-cuda | |
- windows-oneapi | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |