fix: embedding crashing #601

Workflow file for this run

	name: ci

	permissions:
	contents: read
	pull-requests: read
	actions: read

	env:
	LLAMA_BOX_BUILD_VERSION: "${{ github.ref_name }}"

	on:
	workflow_dispatch: { }
	push:
	tags:
	- "v..*"
	branches:
	- "main"
	- "branch-v."
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"
	- "!.github/workflows/ci.yml"
	pull_request:
	branches:
	- "main"
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"
	- "!.github/workflows/ci.yml"

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true


	# Disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	jobs:

	darwin:
	strategy:
	fail-fast: false
	matrix:
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' \|\| 'macos-14' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-darwin-${{ matrix.arch }}-${{ matrix.instruction }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Deps
	run: \|
	brew update && brew install ccache
	- name: Setup XCode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.2'
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== BUILD ====="
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_ACCELERATE=on -DGGML_METAL=off \
	-DGGML_NATIVE=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)

	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	otool --version
	otool -L ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}

	darwin-metal:
	strategy:
	fail-fast: false
	matrix:
	arch:
	- 'amd64'
	- 'arm64'
	version:
	- '3.0'
	# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
	# https://support.apple.com/en-us/102894.
	runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' \|\| 'macos-14' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Deps
	run: \|
	brew update && brew install ccache
	- name: Setup XCode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.2'
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== BUILD ====="
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on \
	-DGGML_NATIVE=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)

	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	otool --version
	otool -L ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }}

	linux:
	strategy:
	fail-fast: false
	matrix:
	# AVX2 ==> CentOS 7.
	# AVX512 ==> RockyLinux 8.9.
	# NEON ==> Ubuntu 18.04.
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	distro_container_image: 'centos:7'
	- arch: 'amd64'
	instruction: 'avx512'
	distro_container_image: 'rockylinux:8.9'
	- arch: 'arm64'
	instruction: 'neon'
	distro_container_image: 'ubuntu:18.04'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-${{ matrix.arch }}-${{ matrix.instruction }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CMAKE_VERSION: "3.22.1"
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "ubuntu" ]; then
	apt-get update -y \
	&& apt-get install -y binutils pkg-config build-essential libopenblas-dev ccache curl git bc
	if (( \$(echo "\${VERSION_ID} < 21.04" \| bc -l) )); then
	apt-get update -y \
	&& apt-get install -y software-properties-common
	add-apt-repository -y ppa:ubuntu-toolchain-r/test
	apt-get update -y \
	&& apt-get install -y gcc-11 g++-11
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
	update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
	fi
	elif [ "\${ID}" = "rocky" ]; then
	# NB(thxCode): Enbale EPEL, see
	# https://wiki.rockylinux.org/rocky/repo/#notes-on-epel.
	dnf install -y epel-release
	if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then
	dnf config-manager --set-enabled powertools
	else
	dnf config-manager --set-enabled crb
	fi
	dnf install -y binutils pkgconfig gcc gcc-c++ make glibc-static libstdc++-static openblas-static ccache curl git
	if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then
	dnf install -y gcc-toolset-11
	source scl_source enable gcc-toolset-11
	fi
	elif [ "\${ID}" = "centos" ]; then
	# NB(thxCode): Patch for CentOS, see
	# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum install -y centos-release-scl
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum update -y \
	&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
	export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
	&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
	&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
	&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
	&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
	&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
	&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
	&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
	yum --enablerepo=extras install -y epel-release
	yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
	yum update -y \
	&& yum install -y make glibc-static libstdc++-static openblas-static ccache curl git
	cat <<EOL >/usr/lib64/pkgconfig/openblas.pc
	Name: OpenBLAS
	Description: OpenBLAS library
	Version: 0.3.3
	Libs: -L/usr/lib64 -lopenblas
	Cflags: -I/usr/include/openblas
	EOL
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" \| tar -zx -C /usr --strip-components 1
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_NATIVE=off \
	${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX2=on' \|\| '' }} \
	${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on -DGGML_AVX512_BF16=on' \|\| '' }} \
	${{ matrix.instruction == 'neon' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_BLAS_VENDOR=OpenBLAS \
	-DGGML_STATIC=on \
	-DGGML_BLAS=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}

	linux-hip:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/rocm/dev-centos-7/tags.
	# 6.1 ==> 6.1.2, CentOS 7.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	# official gpu support list,
	# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html.
	include:
	- arch: 'amd64'
	version: '6.1'
	distro_container_image: 'rocm/dev-centos-7:6.1.2-complete'
	hip_arch: 'gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1030;gfx1100;gfx1101;gfx1102'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CMAKE_VERSION: "3.22.1"
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "centos" ]; then
	# NB(thxCode): Patch for CentOS, see
	# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum install -y centos-release-scl
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum update -y \
	&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
	export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
	&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
	&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
	&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
	&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
	&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
	&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
	&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
	yum --enablerepo=extras install -y epel-release
	yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
	yum update -y \
	&& yum install -y make glibc-static libstdc++-static ccache curl git
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" \| tar -zx -C /usr --strip-components 1
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
	-DCMAKE_HIP_COMPILER="\$(hipconfig -l)/clang" \
	-DGGML_NATIVE=off \
	-DGGML_CUDA_DMMV_X=4096 \
	-DGGML_CUDA_MMV_Y=256 \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env AMDGPU_TARGETS \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}

	linux-cuda:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel.
	# 12.4 ==> 12.4.0, CentOS 7.
	# 11.8 ==> 11.8.0, CentOS 7
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	include:
	- arch: 'amd64'
	version: '12.4'
	distro_container_image: 'nvidia/cuda:12.4.0-devel-centos7'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'amd64'
	version: '11.8'
	distro_container_image: 'nvidia/cuda:11.8.0-devel-centos7'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CMAKE_VERSION: "3.22.1"
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "centos" ]; then
	# NB(thxCode): Patch for CentOS, see
	# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum install -y centos-release-scl
	sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
	&& sed -i s/^#.baseurl=http/baseurl=http/g /etc/yum.repos.d/.repo \
	&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
	yum update -y \
	&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
	export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
	&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
	&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
	&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
	&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
	&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
	&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
	&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
	yum --enablerepo=extras install -y epel-release
	yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
	yum update -y \
	&& yum install -y make glibc-static libstdc++-static ccache curl git
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" \| tar -zx -C /usr --strip-components 1
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \
	-DGGML_NATIVE=off \
	-DGGML_CUDA_DMMV_X=4096 \
	-DGGML_CUDA_MMV_Y=256 \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env CUDA_ARCHITECTURES \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}

	linux-oneapi:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel.
	# 2025.0 ==> 2025.0.0-0, Ubuntu 22.04.
	include:
	- arch: 'amd64'
	version: '2025.0'
	distro_container_image: 'intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CMAKE_VERSION: "3.22.1"
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "ubuntu" ]; then
	apt-get update -y \
	&& apt-get install -y build-essential ccache curl git bc
	if (( \$(echo "\${VERSION_ID} < 21.04" \| bc -l) )); then
	apt-get update -y \
	&& apt-get install -y software-properties-common
	add-apt-repository -y ppa:ubuntu-toolchain-r/test
	apt-get update -y \
	&& apt-get install -y gcc-11 g++-11
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
	update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
	fi
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" \| tar -zx -C /usr --strip-components 1
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_SYCL=on -DGGML_SYCL_F16=on \
	-DGGML_NATIVE=off \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env CC=icx \
	--env CXX=icpx \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}

	linux-cann:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=8.0.rc2.alpha003-910b.
	# 8.0 ==> 8.0.rc2.alpha003, Ubuntu 20.04, OpenEuler 20.03
	arch:
	- 'amd64'
	- 'arm64'
	version:
	- '8.0'
	distro_container_image:
	- 'ascendai/cann:8.0.rc2.alpha003-910b-openeuler20.03-py3.9'
	- 'ascendai/cann:8.0.rc2.alpha003-910b-ubuntu20.04-py3.9'
	- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-openeuler20.03-py3.9'
	- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-ubuntu20.04-py3.9'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.distro_container_image }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CMAKE_VERSION: "3.22.1"
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "ubuntu" ]; then
	apt-get update -y \
	&& apt-get install -y build-essential ccache curl git bc
	if (( \$(echo "\${VERSION_ID} < 21.04" \| bc -l) )); then
	apt-get update -y \
	&& apt-get install -y software-properties-common
	add-apt-repository -y ppa:ubuntu-toolchain-r/test
	apt-get update -y \
	&& apt-get install -y gcc-11 g++-11
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
	update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
	fi
	elif [ "\${ID}" = "openEuler" ]; then
	yum update -y \
	&& yum install -y gcc gcc-c++ gcc-toolset-10-gcc*
	yum update -y \
	&& yum install -y gcc-toolset-10-libstdc++-static make ccache curl git
	export PATH="/opt/openEuler/gcc-toolset-10/root/usr/bin/:\${PATH}" && \
	export LD_LIBRARY_PATH="/opt/openEuler/gcc-toolset-10/root/usr/lib64/:\${LD_LIBRARY_PATH}"
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	source /usr/local/Ascend/ascend-toolkit/set_env.sh
	curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" \| tar -zx -C /usr --strip-components 1
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_CANN=on \
	-DSOC_TYPE=${{ contains(matrix.distro_container_image, '910b') && 'Ascend910B3' \|\| 'Ascend310P3' }} \
	-DGGML_NATIVE=off \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	${{ contains(matrix.distro_container_image, '310p') && '-DGGML_AVX2=off' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' \|\| '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' \|\| '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}

	linux-musa:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/mthreads/musa/tags?page_size=&ordering=&name=ubuntu20.04.
	# rc3.1.0 ==> rc3.1.0, Ubuntu 20.04.
	include:
	- arch: 'amd64'
	version: 'rc3.1'
	distro_container_image: 'mthreads/musa:rc3.1.0-devel-ubuntu22.04'
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-musa-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v8.1.5
	platforms: "arm64"
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "ubuntu" ]; then
	apt-get update -y \
	&& apt-get install -y build-essential ccache curl git cmake
	if (( \$(echo "\${VERSION_ID} < 21.04" \| bc -l) )); then
	apt-get update -y \
	&& apt-get install -y software-properties-common
	add-apt-repository -y ppa:ubuntu-toolchain-r/test
	apt-get update -y \
	&& apt-get install -y gcc-11 g++-11
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
	update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
	fi
	else
	echo "Unsupport distribution: \$ID"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_MUSA=on \
	-DGGML_NATIVE=off \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}

	windows:
	strategy:
	fail-fast: false
	matrix:
	# AVX2 ==> Windows Server 2022.
	# AVX512 ==> Windows Server 2022.
	# NEON ==> Windows Server 2022.
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	- arch: 'amd64'
	instruction: 'avx512'
	- arch: 'arm64'
	instruction: 'neon'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	# doesn't support ccache,
	# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake.
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-${{ matrix.arch }}-${{ matrix.instruction }}
	path: \|
	${{ github.workspace }}\build
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache ninja curl -y
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	cmake ${{ matrix.arch == 'arm64' && '-G "Ninja Multi-Config"'\|\| '' }} -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	${{ matrix.arch == 'arm64' && format('-DCMAKE_TOOLCHAIN_FILE={0}\llama-box\scripts\build-windows-arm64.cmake', github.workspace) \|\| '' }} `
	-DGGML_NATIVE=off `
	${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX2=on' \|\| '' }} `
	${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on -DGGML_AVX512_BF16=on' \|\| '' }} `
	-DGGML_STATIC=on `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- ${{ matrix.arch == 'arm64' && '-j ' \|\| '/m:' }}${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\Release\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}

	windows-hip:
	strategy:
	fail-fast: false
	matrix:
	# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
	# 6.1 ==> 6.1.2, Windows Server 2022.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	# official gpu support list,
	# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html.
	include:
	- arch: 'amd64'
	version: '6.1'
	distro_binary_installer: 'https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe'
	hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache curl -y
	- name: Setup HIP
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download AMD ROCm HIP SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install AMD ROCm HIP SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-install'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify AMD ROCm HIP SDK"
	& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

	$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| Split-Path \| Split-Path)"
	"HIP_PATH=${hipPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "HIP_PATH=${env:HIP_PATH}"

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}"
	cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
	-DGGML_HIP=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" `
	-DGGML_NATIVE=off `
	-DGGML_CUDA_DMMV_X=4096 `
	-DGGML_CUDA_MMV_Y=256 `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}

	windows-cuda:
	strategy:
	fail-fast: false
	matrix:
	# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network.
	# 12.4 ==> 12.4.1, Windows Server 2022.
	# 11.8 ==> 11.8.0, Windows Server 2019.
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	include:
	- arch: 'amd64'
	version: '12.4'
	distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'amd64'
	version: '11.8'
	distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	runs-on: ${{ matrix.version == '11.8' && 'windows-2019' \|\| 'windows-2022' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache ninja curl -y
	- name: Setup CUDA
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download NVIDIA CUDA SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install NVIDIA CUDA SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-s','nvcc_${{ matrix.version }}','cudart_${{ matrix.version }}','cublas_${{ matrix.version }}','cublas_dev_${{ matrix.version }}','thrust_${{ matrix.version }}','visual_studio_integration_${{ matrix.version }}'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify NVIDIA CUDA SDK"
	& 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' --version

	$cudaPath = "$(Resolve-Path -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' \| Split-Path \| Split-Path)"
	$cudaVersion=($cudaPath \| Split-Path -Leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
	"CUDA_PATH=${cudaPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append
	"CUDA_PATH_V${cudaVersion}=$cudaPath" \| Out-File -FilePath $env:GITHUB_ENV -Append
	"CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVersion}" \| Out-File -FilePath $env:GITHUB_ENV -Append

	if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") {
	cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	} else {
	cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	}
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "CUDA_PATH=${env:CUDA_PATH}"

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" `
	-DGGML_NATIVE=off `
	-DGGML_CUDA_DMMV_X=4096 `
	-DGGML_CUDA_MMV_Y=256 `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}

	windows-oneapi:
	strategy:
	fail-fast: false
	matrix:
	# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=online.
	# 2025.0 ==> 2025.0.0, Windows Server 2022.
	include:
	- arch: 'amd64'
	version: '2025.0'
	distro_binary_installer: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882.exe'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	# doesn't support ccache,
	# as the oneAPI need to configure the environment variables via setvars.bat.
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\build
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ninja curl -y
	- name: Setup oneAPI
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download Intel oneAPI SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install Intel oneAPI SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify Intel oneAPI SDK"
	& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version

	$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' \| Split-Path \| Split-Path)"
	"ONEAPI_PATH=${oneapiPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append
	$oneapiRoot = "$(Split-Path -Path $oneapiPath)"
	"ONEAPI_ROOT=${oneapiRoot}" \| Out-File -FilePath $env:GITHUB_ENV -Append

	cmd /c "call `"${oneapiRoot}\setvars.bat`" && set" \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	- name: Build
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}"
	Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}"

	Write-Host "===== BUILD ====="
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx `
	-DGGML_SYCL=on -DGGML_SYCL_F16=on `
	-DGGML_NATIVE=off `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}

	release:
	if: ${{ startsWith(github.ref, 'refs/tags/') }}
	permissions:
	contents: write
	actions: read
	id-token: write
	runs-on: ubuntu-22.04
	needs:
	- darwin
	- darwin-metal
	- linux
	- linux-hip
	- linux-cuda
	- linux-oneapi
	- linux-cann
	- linux-musa
	- windows
	- windows-hip
	- windows-cuda
	- windows-oneapi
	steps:
	- name: Download Artifact
	uses: actions/download-artifact@v4
	with:
	path: ${{ github.workspace }}/out
	merge-multiple: true
	- name: Release
	uses: softprops/action-gh-release@v1
	with:
	fail_on_unmatched_files: true
	tag_name: "${{ env.VERSION }}"
	prerelease: ${{ contains(github.ref, 'rc') }}
	files: ${{ github.workspace }}/out/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: embedding crashing #601

Workflow file

fix: embedding crashing #601

Jobs

Run details

Workflow file for this run