Implement '--keep-split' to quantize model into several shards #2099
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Server build and tests | |
name: Server | |
on: | |
workflow_dispatch: # allows manual triggering | |
inputs: | |
sha: | |
description: 'Commit SHA1 to build' | |
required: false | |
type: string | |
slow_tests: | |
description: 'Run slow tests' | |
required: true | |
type: boolean | |
push: | |
branches: | |
- master | |
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] | |
pull_request_target: | |
types: [opened, synchronize, reopened] | |
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] | |
schedule: | |
- cron: '2 4 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
server: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
# TODO: temporary disabled due to linux kernel issues | |
#sanitizer: [ADDRESS, THREAD, UNDEFINED] | |
sanitizer: [UNDEFINED] | |
build_type: [Debug] | |
include: | |
- build_type: Release | |
sanitizer: "" | |
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken | |
container: | |
image: ubuntu:latest | |
ports: | |
- 8888 | |
options: --cpus 4 | |
steps: | |
- name: Dependencies | |
id: depends | |
run: | | |
apt-get update | |
apt-get -y install \ | |
build-essential \ | |
xxd \ | |
git \ | |
cmake \ | |
python3-pip \ | |
curl \ | |
wget \ | |
language-pack-en \ | |
libcurl4-openssl-dev | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Verify server deps | |
id: verify_server_deps | |
run: | | |
git config --global --add safe.directory $(realpath .) | |
cd examples/server | |
git ls-files --others --modified | |
git status | |
./deps.sh | |
git status | |
not_ignored_files="$(git ls-files --others --modified)" | |
echo "Modified files: ${not_ignored_files}" | |
if [ -n "${not_ignored_files}" ]; then | |
echo "Repository is dirty or server deps are not built as expected" | |
echo "${not_ignored_files}" | |
exit 1 | |
fi | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. \ | |
-DLLAMA_NATIVE=OFF \ | |
-DLLAMA_BUILD_SERVER=ON \ | |
-DLLAMA_CURL=ON \ | |
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ | |
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; | |
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server | |
- name: Tests dependencies | |
id: test_dependencies | |
run: | | |
pip install -r examples/server/tests/requirements.txt | |
- name: Tests | |
id: server_integration_tests | |
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} | |
run: | | |
cd examples/server/tests | |
PORT=8888 ./tests.sh | |
- name: Slow tests | |
id: server_integration_tests_slow | |
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} | |
run: | | |
cd examples/server/tests | |
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow | |
server-windows: | |
runs-on: windows-latest | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: libCURL | |
id: get_libcurl | |
env: | |
CURL_VERSION: 8.6.0_6 | |
run: | | |
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" | |
mkdir $env:RUNNER_TEMP/libcurl | |
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl | |
- name: Build | |
id: cmake_build | |
run: | | |
mkdir build | |
cd build | |
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" | |
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server | |
- name: Python setup | |
id: setup_python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Tests dependencies | |
id: test_dependencies | |
run: | | |
pip install -r examples/server/tests/requirements.txt | |
- name: Copy Libcurl | |
id: prepare_libcurl | |
run: | | |
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll | |
- name: Tests | |
id: server_integration_tests | |
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} | |
run: | | |
cd examples/server/tests | |
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp | |
- name: Slow tests | |
id: server_integration_tests_slow | |
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} | |
run: | | |
cd examples/server/tests | |
behave.exe --stop --no-skipped --no-capture --tags slow |