Unit Tests #543

Workflow file for this run

.github/workflows/unit_tests.yml at 44f2d1a

	name: Unit Tests

	defaults:
	run:
	shell: bash -le {0}

	on:
	repository_dispatch:
	workflow_dispatch:
	inputs:
	repo:
	description: 'GitHub repo {owner}/{repo}'
	required: false
	default: ''
	ref:
	description: 'GitHub ref: Branch, Tag or Commit SHA'
	required: false
	default: ''
	pr_number:
	description: 'PR Number'
	required: false
	type: number
	test_names:
	description: 'Input Test(s) to Run (default all)'
	required: false
	default: ''
	artifact_id:
	description: 'Run id for artifact to be downloaded'
	required: false
	default: ''
	max-parallel:
	description: 'max parallel jobs'
	required: false
	default: '6'

	env:
	CUDA_DEVICE_ORDER: PCI_BUS_ID
	CUDA_VISIBLE_DEVICES: 0
	MAX_JOBS: 4
	ZEN4_SERVER: 10.0.14.248
	TRANSFORMERS_DIFF_TESTS: "models/test_internlm,models/test_internlm2_5,models/test_xverse"
	TORCH_2_5_TESTS: "test_q4_ipex.py,test_save_loaded_quantized_model,test_quant_formats"
	IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral"
	GPTQMODEL_FORCE_BUILD: 1
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}

	concurrency:
	group: ${{ github.event.inputs.ref \|\| github.ref }}-workflow-unit-tests-${{ github.event.inputs.test_names }}
	cancel-in-progress: true

	jobs:
	check-vm:
	runs-on: self-hosted
	container:
	image: modelcloud/gptqmodel:alpine-ci-v1
	outputs:
	ip: ${{ steps.get_ip.outputs.ip }}
	tag: ${{ steps.get_ip.outputs.tag }}
	run_id: ${{ steps.get_ip.outputs.run_id }}
	max-parallel: ${{ steps.get_ip.outputs.max-parallel }}
	steps:
	- name: Print env
	run: \|
	echo "repo: ${{ env.repo }}"
	echo "ref: ${{ env.ref }}"

	- name: Select server
	id: get_ip
	run: \|
	echo "ip=$ZEN4_SERVER" >> "$GITHUB_OUTPUT"
	echo "tag=zen4" >> "$GITHUB_OUTPUT"

	echo "GPU_IP=$ZEN4_SERVER" >> $GITHUB_ENV

	echo "tag: $tag, ip: $ip"

	if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
	run_id="${{ github.event.inputs.artifact_id }}"
	else
	run_id="${{ github.run_id }}"
	fi
	echo "run_id=$run_id" >> "$GITHUB_OUTPUT"
	echo "artifact_id=$run_id"

	max_p=${{ github.event.inputs.max-parallel }}
	max_p="{\"size\": ${max_p:-6}}"
	echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT"
	echo "max-parallel=$max_p"

	list-test-files:
	runs-on: ubuntu-latest
	outputs:
	torch-2-5-files: ${{ steps.files.outputs.torch-2-5-files }}
	gpu-files: ${{ steps.files.outputs.gpu-files }}
	transformers-files: ${{ steps.files.outputs.transformers-files }}

	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: List files
	id: files
	run: \|
	script="
	import json
	import os

	cpu_file_list = [f.strip().removesuffix('.py') for f in '${TORCH_2_5_TESTS}'.split(',') if f.strip()]
	test_files_list = [f.strip().removesuffix('.py') for f in '${{ github.event.inputs.test_names }}'.split(',') if f.strip()]
	cpu_test_files = [f for f in cpu_file_list if not test_files_list or f in test_files_list]
	transformers_diff_list = [f.strip().removesuffix('.py') for f in '${TRANSFORMERS_DIFF_TESTS}'.split(',') if f.strip()]
	transformers_test_files = [f for f in transformers_diff_list if not test_files_list or f in test_files_list]

	all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}']

	all_tests_models = ['models/'+f.removesuffix('.py') for f in os.listdir('tests/models') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('.py') not in '${IGNORED_TEST_FILES}']
	all_tests = [item for item in all_tests+all_tests_models if item.strip().removesuffix('.py') not in '${TORCH_2_5_TESTS}']

	gpu_test_files = [f for f in all_tests if f not in cpu_file_list and (not test_files_list or f in test_files_list) and f not in transformers_diff_list]

	print(f'{json.dumps(cpu_test_files)}\|{json.dumps(gpu_test_files)}\|{json.dumps(transformers_test_files)}')
	"

	test_files=$(python3 -c "$script")
	IFS='\|' read -r cpu_test_files gpu_test_files transformers_test_files <<< "$test_files"
	echo "torch-2-5-files=$cpu_test_files" >> "$GITHUB_OUTPUT"
	echo "gpu-files=$gpu_test_files" >> "$GITHUB_OUTPUT"
	echo "transformers-files=$transformers_test_files" >> "$GITHUB_OUTPUT"

	echo "Test files: $test_files"
	echo "CPU Test files: $cpu_test_files"
	echo "GPU Test files: $gpu_test_files"
	echo "Transformers Test files: $transformers_test_files"
	echo "Ignored Test files: $IGNORED_TEST_FILES"

	build-zen4:
	runs-on: [self-hosted, zen4]
	needs: check-vm
	if: needs.check-vm.outputs.tag == 'zen4' && github.event.inputs.artifact_id == '' && !cancelled()
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
	steps:

	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch

	- name: Install requirements
	run: \|
	bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11
	uv pip install transformers -U

	- name: Compile
	timeout-minutes: 35
	run: python setup.py bdist_wheel

	- name: Test install
	run: \|
	ls -ahl dist
	whl=$(ls -t dist/*.whl \| head -n 1 \| xargs basename)
	sha256=$(sha256sum dist/$whl)
	echo "hash=$sha256"

	echo "WHL_HASH=$sha256" >> $GITHUB_ENV
	echo "WHL_NAME=$whl" >> $GITHUB_ENV

	twine check dist/$whl
	uv pip install dist/$whl

	- name: Upload wheel
	continue-on-error: true
	run: \|
	curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload

	- name: Upload to artifact
	uses: actions/upload-artifact@v4
	with:
	name: dist
	path: dist

	torch2_4:
	needs:
	- build-zen4
	- list-test-files
	- check-vm
	runs-on: self-hosted
	if: always() && !cancelled() && (needs.build-zen4.result == 'success' \|\| github.event.inputs.artifact_id != '')
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
	volumes:
	- /home/ci/models:/monster/data/model
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size \|\| 6 }}
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.gpu-files) }}

	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ github.event.inputs.repo }}
	ref: ${{ github.event.inputs.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch

	if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
	run_id="${{ github.event.inputs.artifact_id }}"
	else
	run_id="${{ github.run_id }}"
	fi
	echo "RUN_ID=$run_id" >> $GITHUB_ENV
	echo "RUN_ID=${run_id}"

	if grep -q "bitblas" tests/${{ matrix.test_script }}.py; then
	echo "BITBLAS=1" >> $GITHUB_ENV
	fi

	- name: Download wheel
	continue-on-error: true
	run: \|
	file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")

	if echo "$file_name" \| grep -q "gptqmodel"; then
	mkdir dist \|\| true
	cd dist
	curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "DOWNLOADED=1" >> $GITHUB_ENV
	fi

	- name: Download artifact
	if: env.DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: dist
	path: dist
	run-id: ${{ needs.check-vm.outputs.run_id }}

	- name: Install wheel
	run: \|
	uv pip install auto_round optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
	uv pip install dist/*.whl
	bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11
	uv pip install transformers numpy==1.26.4 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}

	- name: Check platform
	run: \|
	ip=${ZEN4_SERVER}
	echo "GPU_IP=$ip" >> $GITHUB_ENV
	echo "-----------"
	pip show torch
	echo "-----------"
	nvcc --version

	- name: Find suitable GPU
	run: \|
	timestamp=$(date +%s%3N)
	gpu_id=-1

	while [ "$gpu_id" -lt 0 ]; do
	gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")

	if [ "$gpu_id" -lt 0 ]; then
	echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
	echo "No available GPU, waiting 5 seconds..."
	sleep 5
	else
	echo "Allocated GPU ID: $gpu_id"
	fi
	done
	echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
	echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
	echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"

	- name: Run tests
	if: ${{ (!github.event.inputs.test_names \|\| contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
	run: pytest --durations=0 tests/${{ matrix.test_script }}.py \|\| { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }

	- name: Clear cache
	if: failure() && env.BITBLAS == '1' && env.ERROR == '1'
	run: \|
	rm -rf ~/.cache/bitblas/nvidia/geforce-rtx-4090
	echo "clear bitblas cache"

	- name: Release GPU
	if: always()
	run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

	transformers_diff:
	needs:
	- build-zen4
	- list-test-files
	- check-vm
	runs-on: self-hosted
	if: always() && !cancelled() && (needs.build-zen4.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != ''
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.4.1
	volumes:
	- /home/ci/models:/monster/data/model
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size \|\| 6 }}
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ github.event.inputs.repo }}
	ref: ${{ github.event.inputs.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch
	ip=${{ needs.check-vm.outputs.ip }}
	echo "GPU_IP=$ip" >> $GITHUB_ENV

	- name: Download wheel
	continue-on-error: true
	run: \|
	file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")

	if echo "$file_name" \| grep -q "gptqmodel"; then
	mkdir dist \|\| true
	cd dist
	curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "DOWNLOADED=1" >> $GITHUB_ENV
	fi

	- name: Download artifact
	if: env.DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: dist
	path: dist
	run-id: ${{ needs.check-vm.outputs.run_id }}

	- name: Install wheel
	run: \|
	uv pip install auto_round optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
	uv pip install dist/*.whl
	bash -c "$(curl -L http://$ZEN4_SERVER/scripts/compiler/init_env.sh)" @ 12.4 2.4.1 3.11
	uv pip install transformers==4.38.2 numpy==1.26.4 -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
	if [ "${{ matrix.test_script }}" = "test_xverse" ]; then
	uv pip install tokenizers==0.15.2
	fi

	- name: Find suitable GPU
	run: \|
	timestamp=$(date +%s%3N)
	gpu_id=-1

	while [ "$gpu_id" -lt 0 ]; do
	gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")

	if [ "$gpu_id" -lt 0 ]; then
	echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
	echo "No available GPU, waiting 5 seconds..."
	sleep 5
	else
	echo "Allocated GPU ID: $gpu_id"
	fi
	done
	echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
	echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
	echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"

	- name: Run tests
	if: ${{ (!github.event.inputs.test_names \|\| contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
	run: pytest --durations=0 tests/${{ matrix.test_script }}.py \|\| { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }

	- name: Release GPU
	if: always()
	run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

	torch2_5:
	needs:
	- build-zen4
	- list-test-files
	- check-vm
	runs-on: self-hosted
	if: always() && !cancelled() && (needs.build-zen4.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-2-5-files != '[]'
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2-torch2.5.1
	volumes:
	- /home/ci/models:/monster/data/model
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size \|\| 6 }}
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-2-5-files) }}
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch
	ip=${{ needs.check-vm.outputs.ip }}
	echo "GPU_IP=$ip" >> $GITHUB_ENV

	- name: Download wheel
	continue-on-error: true
	run: \|
	file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
	if echo "$file_name" \| grep -q "gptqmodel"; then
	mkdir dist \|\| true
	cd dist
	curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "DOWNLOADED=1" >> $GITHUB_ENV
	fi

	- name: Download artifact
	if: env.DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: dist
	path: dist
	run-id: ${{ needs.check-vm.outputs.run_id }}

	- name: Install wheel
	run: \|
	uv pip install device-smi -i https://pypi.org/simple
	uv pip install intel_extension_for_pytorch auto_round bitblas==0.0.1.dev13 dist/*.whl -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}

	- name: Run tests
	if: ${{ (!github.event.inputs.test_names \|\| contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
	run: pytest --durations=0 tests/${{ matrix.test_script }}.py

	- name: Release GPU
	if: always()
	run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unit Tests #543

Workflow file

Unit Tests #543

Jobs

Run details

Workflow file for this run