Unit Tests #546
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
defaults: | |
run: | |
shell: bash -le {0} | |
on: | |
repository_dispatch: | |
workflow_dispatch: | |
inputs: | |
repo: | |
description: 'GitHub repo {owner}/{repo}' | |
required: false | |
default: '' | |
ref: | |
description: 'GitHub ref: Branch, Tag or Commit SHA' | |
required: false | |
default: '' | |
pr_number: | |
description: 'PR Number' | |
required: false | |
type: number | |
test_names: | |
description: 'Input Test(s) to Run (default all)' | |
required: false | |
default: '' | |
artifact_id: | |
description: 'Run id for artifact to be downloaded' | |
required: false | |
default: '' | |
max-parallel: | |
description: 'max parallel jobs' | |
required: false | |
default: '6' | |
env: | |
CUDA_DEVICE_ORDER: PCI_BUS_ID | |
CUDA_VISIBLE_DEVICES: 0 | |
MAX_JOBS: 4 | |
ZEN4_SERVER: 10.0.14.248 | |
TRANSFORMERS_DIFF_TESTS: "models/test_internlm,models/test_internlm2_5,models/test_xverse" | |
TORCH_2_5_TESTS: "test_q4_ipex.py,test_save_loaded_quantized_model,test_quant_formats" | |
IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral" | |
GPTQMODEL_FORCE_BUILD: 1 | |
repo: ${{ github.event.inputs.repo || github.repository }} | |
ref: ${{ github.event.inputs.ref || github.ref }} | |
concurrency: | |
group: ${{ github.event.inputs.ref || github.ref }}-workflow-unit-tests-${{ github.event.inputs.test_names }} | |
cancel-in-progress: true | |
jobs: | |
check-vm: | |
runs-on: self-hosted | |
container: | |
image: modelcloud/gptqmodel:alpine-ci-v1 | |
outputs: | |
ip: ${{ steps.get_ip.outputs.ip }} | |
tag: ${{ steps.get_ip.outputs.tag }} | |
run_id: ${{ steps.get_ip.outputs.run_id }} | |
max-parallel: ${{ steps.get_ip.outputs.max-parallel }} | |
steps: | |
- name: Print env | |
run: | | |
echo "repo: ${{ env.repo }}" | |
echo "ref: ${{ env.ref }}" | |
- name: Select server | |
id: get_ip | |
run: | | |
echo "ip=$ZEN4_SERVER" >> "$GITHUB_OUTPUT" | |
echo "tag=zen4" >> "$GITHUB_OUTPUT" | |
echo "GPU_IP=$ZEN4_SERVER" >> $GITHUB_ENV | |
echo "tag: $tag, ip: $ip" | |
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then | |
run_id="${{ github.event.inputs.artifact_id }}" | |
else | |
run_id="${{ github.run_id }}" | |
fi | |
echo "run_id=$run_id" >> "$GITHUB_OUTPUT" | |
echo "artifact_id=$run_id" | |
max_p=${{ github.event.inputs.max-parallel }} | |
max_p="{\"size\": ${max_p:-6}}" | |
echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT" | |
echo "max-parallel=$max_p" | |
list-test-files: | |
runs-on: ubuntu-latest | |
outputs: | |
torch-2-5-files: ${{ steps.files.outputs.torch-2-5-files }} | |
gpu-files: ${{ steps.files.outputs.gpu-files }} | |
transformers-files: ${{ steps.files.outputs.transformers-files }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: List files | |
id: files | |
run: | | |
script=" | |
import json | |
import os | |
cpu_file_list = [f.strip().removesuffix('.py') for f in '${TORCH_2_5_TESTS}'.split(',') if f.strip()] | |
test_files_list = [f.strip().removesuffix('.py') for f in '${{ github.event.inputs.test_names }}'.split(',') if f.strip()] | |
cpu_test_files = [f for f in cpu_file_list if not test_files_list or f in test_files_list] | |
transformers_diff_list = [f.strip().removesuffix('.py') for f in '${TRANSFORMERS_DIFF_TESTS}'.split(',') if f.strip()] | |
transformers_test_files = [f for f in transformers_diff_list if not test_files_list or f in test_files_list] | |
all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}'] | |
all_tests_models = ['models/'+f.removesuffix('.py') for f in os.listdir('tests/models') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}'] | |
all_tests = [item for item in all_tests+all_tests_models if item.strip().removesuffix('.py') not in '${TORCH_2_5_TESTS}'] | |
gpu_test_files = [f for f in all_tests if f not in cpu_file_list and (not test_files_list or f in test_files_list) and f not in transformers_diff_list] | |
print(f'{json.dumps(cpu_test_files)}|{json.dumps(gpu_test_files)}|{json.dumps(transformers_test_files)}') | |
" | |
test_files=$(python3 -c "$script") | |
IFS='|' read -r cpu_test_files gpu_test_files transformers_test_files <<< "$test_files" | |
echo "torch-2-5-files=$cpu_test_files" >> "$GITHUB_OUTPUT" | |
echo "gpu-files=$gpu_test_files" >> "$GITHUB_OUTPUT" | |
echo "transformers-files=$transformers_test_files" >> "$GITHUB_OUTPUT" | |
echo "Test files: $test_files" | |
echo "CPU Test files: $cpu_test_files" | |
echo "GPU Test files: $gpu_test_files" | |
echo "Transformers Test files: $transformers_test_files" | |
echo "Ignored Test files: $IGNORED_TEST_FILES" | |
build-zen4: | |
runs-on: [self-hosted, zen4] | |
needs: check-vm | |
if: needs.check-vm.outputs.tag == 'zen4' && github.event.inputs.artifact_id == '' && !cancelled() | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1 | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
- name: Install requirements | |
run: | | |
bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11 | |
uv pip install transformers -U | |
- name: Compile | |
timeout-minutes: 35 | |
run: python setup.py bdist_wheel | |
- name: Test install | |
run: | | |
ls -ahl dist | |
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename) | |
sha256=$(sha256sum dist/$whl) | |
echo "hash=$sha256" | |
echo "WHL_HASH=$sha256" >> $GITHUB_ENV | |
echo "WHL_NAME=$whl" >> $GITHUB_ENV | |
twine check dist/$whl | |
uv pip install dist/$whl | |
- name: Upload wheel | |
continue-on-error: true | |
run: | | |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload | |
- name: Upload to artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
torch2_4: | |
needs: | |
- build-zen4 | |
- list-test-files | |
- check-vm | |
runs-on: self-hosted | |
if: always() && !cancelled() && (needs.build-zen4.result == 'success' || github.event.inputs.artifact_id != '') | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1 | |
volumes: | |
- /home/ci/models:/monster/data/model | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 6 }} | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.gpu-files) }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ github.event.inputs.repo }} | |
ref: ${{ github.event.inputs.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then | |
run_id="${{ github.event.inputs.artifact_id }}" | |
else | |
run_id="${{ github.run_id }}" | |
fi | |
echo "RUN_ID=$run_id" >> $GITHUB_ENV | |
echo "RUN_ID=${run_id}" | |
if grep -q "bitblas" tests/${{ matrix.test_script }}.py; then | |
echo "BITBLAS=1" >> $GITHUB_ENV | |
fi | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' && !cancelled() | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
uv pip install auto_round optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
uv pip install dist/*.whl | |
bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11 | |
uv pip install transformers numpy==1.26.4 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
- name: Check platform | |
run: | | |
ip=${ZEN4_SERVER} | |
echo "GPU_IP=$ip" >> $GITHUB_ENV | |
echo "-----------" | |
pip show torch | |
echo "-----------" | |
nvcc --version | |
- name: Find suitable GPU | |
run: | | |
timestamp=$(date +%s%3N) | |
gpu_id=-1 | |
while [ "$gpu_id" -lt 0 ]; do | |
gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp") | |
if [ "$gpu_id" -lt 0 ]; then | |
echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp returned $gpu_id" | |
echo "No available GPU, waiting 5 seconds..." | |
sleep 5 | |
else | |
echo "Allocated GPU ID: $gpu_id" | |
fi | |
done | |
echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV | |
echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV | |
echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp" | |
- name: Run tests | |
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }} | |
run: pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; } | |
- name: Clear cache | |
if: failure() && env.BITBLAS == '1' && env.ERROR == '1' | |
run: | | |
rm -rf ~/.cache/bitblas/nvidia/geforce-rtx-4090 | |
echo "clear bitblas cache" | |
- name: Release GPU | |
if: always() | |
run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}" | |
transformers_diff: | |
needs: | |
- build-zen4 | |
- list-test-files | |
- check-vm | |
runs-on: self-hosted | |
if: always() && !cancelled() && (needs.build-zen4.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1 | |
volumes: | |
- /home/ci/models:/monster/data/model | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 6 }} | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ github.event.inputs.repo }} | |
ref: ${{ github.event.inputs.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
ip=${{ needs.check-vm.outputs.ip }} | |
echo "GPU_IP=$ip" >> $GITHUB_ENV | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' && !cancelled() | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
uv pip install auto_round optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
uv pip install dist/*.whl | |
bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11 | |
uv pip install transformers==4.38.2 numpy==1.26.4 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
if [ "${{ matrix.test_script }}" = "test_xverse" ]; then | |
uv pip install tokenizers==0.15.2 | |
fi | |
- name: Find suitable GPU | |
run: | | |
timestamp=$(date +%s%3N) | |
gpu_id=-1 | |
while [ "$gpu_id" -lt 0 ]; do | |
gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp") | |
if [ "$gpu_id" -lt 0 ]; then | |
echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp returned $gpu_id" | |
echo "No available GPU, waiting 5 seconds..." | |
sleep 5 | |
else | |
echo "Allocated GPU ID: $gpu_id" | |
fi | |
done | |
echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV | |
echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV | |
echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp" | |
- name: Run tests | |
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }} | |
run: pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; } | |
- name: Release GPU | |
if: always() | |
run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}" | |
torch2_5: | |
needs: | |
- build-zen4 | |
- list-test-files | |
- check-vm | |
runs-on: self-hosted | |
if: always() && !cancelled() && (needs.build-zen4.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-2-5-files != '[]' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.5.1 | |
volumes: | |
- /home/ci/models:/monster/data/model | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 6 }} | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-2-5-files) }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Fetch PR by number | |
if: ${{ github.event.inputs.pr_number != 0 }} | |
run: | | |
PR_NUMBER=${{ github.event.inputs.pr_number }} | |
echo "pr number $PR_NUMBER" | |
git config --global --add safe.directory $(pwd) | |
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} | |
git checkout pr-${PR_NUMBER} | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
nvcc --version | |
echo "== torch ==" | |
pip show torch | |
ip=${{ needs.check-vm.outputs.ip }} | |
echo "GPU_IP=$ip" >> $GITHUB_ENV | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' && !cancelled() | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
uv pip install device-smi -i https://pypi.org/simple | |
uv pip install intel_extension_for_pytorch auto_round bitblas==0.0.1.dev13 dist/*.whl -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
- name: Run tests | |
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }} | |
run: pytest --durations=0 tests/${{ matrix.test_script }}.py | |
- name: Release GPU | |
if: always() | |
run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}" |