daily_run_test #488

Workflow file for this run

.github/workflows/daily-run-test.yml at ebefffe

	name: daily_run_test

	on:
	workflow_dispatch:
	inputs:
	repo_org:
	required: false
	description: 'Tested repository organization name. Default is open-compass/opencompass'
	type: string
	default: 'open-compass/opencompass'
	repo_ref:
	required: false
	description: 'Set branch or tag or commit id. Default is "main"'
	type: string
	default: 'main'
	build_lmdeploy:
	required: false
	description: 'whether to build lmdeploy'
	type: boolean
	default: false
	repo_org_lmdeploy:
	required: false
	description: 'Tested repository organization name. Default is internlm/lmdeploy'
	type: string
	default: 'InternLM/lmdeploy'
	repo_ref_lmdeploy:
	required: false
	description: 'Set branch or tag or commit id. Default is "main"'
	type: string
	default: 'main'
	regression_func:
	required: true
	description: 'regression functions'
	type: string
	default: "['chat_models','base_models', 'chat_obj_fullbench', 'chat_sub_fullbench', 'base_fullbench','cmd', 'api']"
	cuda_env:
	required: true
	description: "regression conda env, eg. ['dsw_cu11','dsw_cu12']"
	type: string
	default: "['dsw_cu12']"
	fullbench_eval:
	required: true
	description: 'fullbench volc functions'
	type: string
	default: "['base_long_context','base_objective','chat_long_context','chat_objective','chat_subjective']"
	schedule:
	- cron: '15 14 * * *'

	env:
	HF_DATASETS_OFFLINE: 1
	HF_EVALUATE_OFFLINE: 1
	TRANSFORMERS_OFFLINE: 1
	VLLM_USE_MODELSCOPE: false
	LMDEPLOY_USE_MODELSCOPE: false
	HF_HUB_OFFLINE: 1
	OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }}

	jobs:
	build-pypi:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v2
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'open-compass/opencompass' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Set up Python 3.x
	uses: actions/setup-python@v2
	with:
	python-version: 3.x
	- name: Build lagent
	run: \|
	pip install wheel setuptools
	python setup.py sdist bdist_wheel
	- name: Upload Artifacts
	uses: actions/upload-artifact@v4
	with:
	if-no-files-found: error
	path: dist/*
	retention-days: 1
	name: my-artifact-${{ github.run_id }}

	build-pypi-lmdeploy:
	if: ${{!cancelled() && (github.event_name != 'schedule' && inputs.build_lmdeploy)}}
	strategy:
	matrix:
	pyver: [py310]
	runs-on: ubuntu-latest
	env:
	PYTHON_VERSION: ${{ matrix.pyver }}
	PLAT_NAME: manylinux2014_x86_64
	DOCKER_TAG: cuda12.1
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	with:
	repository: ${{ github.event.inputs.repo_org_lmdeploy \|\| 'InternLM/lmdeploy' }}
	ref: ${{github.event.inputs.repo_ref_lmdeploy \|\| 'main'}}
	- name: Build
	run: \|
	echo ${PYTHON_VERSION}
	echo ${PLAT_NAME}
	echo ${DOCKER_TAG}
	echo ${OUTPUT_FOLDER}
	echo ${GITHUB_RUN_ID}
	# remove -it
	sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
	bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
	- name: Upload Artifacts
	uses: actions/upload-artifact@v4
	with:
	if-no-files-found: error
	path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
	retention-days: 1
	name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


	prepare_env:
	if: ${{!cancelled()}}
	needs: ['build-pypi', 'build-pypi-lmdeploy']
	strategy:
	fail-fast: false
	matrix:
	cuda_env: ${{ fromJSON(inputs.cuda_env \|\| '["dsw_cu12"]')}}
	runs-on: ${{ matrix.cuda_env }}
	env:
	CONDA_ENV: opencompass_regression
	PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
	environment: 'prod'
	timeout-minutes: 240 #4hours
	steps:
	- name: Clone repository
	uses: actions/checkout@v2
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'open-compass/opencompass' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Download Artifacts
	uses: actions/download-artifact@v4
	with:
	name: my-artifact-${{ github.run_id }}
	- name: Remove Conda Env
	if: always()
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	- name: Prepare - create conda env and install torch - cu11
	if: ${{matrix.cuda_env == 'dsw_cu11'}}
	uses: nick-fields/retry@v3
	id: retry1
	with:
	max_attempts: 3
	timeout_minutes: 40
	command: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu11.txt --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.1+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	pip uninstall torch torchvision torchaudio -y
	pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
	FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu11torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
	pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	conda info --envs
	pip list
	- name: Prepare - create conda env and install torch - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12'}}
	uses: nick-fields/retry@v3
	id: retry2
	with:
	max_attempts: 3
	timeout_minutes: 40
	command: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu12.txt --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
	pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}}
	FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
	pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
	conda info --envs
	pip list
	- name: Prepare - reinstall lmdeploy - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	uses: actions/download-artifact@v4
	with:
	name: my-artifact-${{ github.run_id }}-py310
	- name: Prepare - reinstall lmdeploy - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	pip install lmdeploy-*.whl --no-deps

	daily_run_test:
	if: ${{!cancelled()}}
	needs: prepare_env
	strategy:
	fail-fast: false
	matrix:
	cuda_env: ${{ fromJSON(inputs.cuda_env \|\| '["dsw_cu12"]')}}
	regression_func: ${{fromJSON(github.event.inputs.regression_func \|\| '["chat_models","base_models","chat_obj_fullbench","chat_sub_fullbench","base_fullbench","cmd","api"]')}}
	runs-on: ${{ matrix.cuda_env }}
	env:
	CONDA_ENV: opencompass_regression
	PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
	HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
	HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
	HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
	COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache
	REPORT_ROOT: /cpfs01/shared/public/qa-llm-cicd/report
	environment: 'prod'
	timeout-minutes: 240 #4hours
	steps:
	- name: Clone repository
	uses: actions/checkout@v2
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'open-compass/opencompass' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Prepare - prepare data and hf model
	run: \|
	rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
	ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
	- name: Run command testcase
	if: matrix.regression_func == 'cmd'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	export from_tf=TRUE
	python tools/list_configs.py internlm2_5 mmlu
	opencompass --models hf_internlm2_5_7b hf_internlm2_1_8b --datasets race_ppl demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
	opencompass --models hf_internlm2_5_7b_chat hf_internlm2_chat_1_8b --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
	opencompass --datasets race_ppl demo_gsm8k_chat_gen --hf-type base --hf-path internlm/internlm2_5-7b --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
	opencompass --datasets race_gen demo_gsm8k_chat_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run chat model test
	if: matrix.regression_func == 'chat_models'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	opencompass .github/scripts/eval_regression_chat.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run base model test
	if: matrix.regression_func == 'base_models'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	opencompass .github/scripts/eval_regression_base.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run chat model test - fullbench
	if: matrix.regression_func == 'chat_obj_fullbench'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	opencompass .github/scripts/eval_regression_chat_objective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m chat_obj_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run chat model test - fullbench
	if: matrix.regression_func == 'chat_sub_fullbench'
	env:
	COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache_subset
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	opencompass .github/scripts/eval_regression_chat_subjective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m chat_sub_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run base model test - fullbench
	if: matrix.regression_func == 'base_fullbench'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	opencompass .github/scripts/eval_regression_base_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m base_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run model test - api
	if: matrix.regression_func == 'api'
	run: \|
	. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
	conda info --envs
	lmdeploy serve api_server internlm/internlm2_5-7b-chat --max-batch-size 256 --model-name internlm2 > ${{env.REPORT_ROOT}}/${{ github.run_id }}/restful.log 2>&1 &
	echo "restful_pid=$!" >> "$GITHUB_ENV"
	sleep 120s
	opencompass .github/scripts/eval_regression_api.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details
	rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }}/*/summary regression_result_daily
	python -m pytest -m api -s -v --color=yes .github/scripts/oc_score_assert.py
	- name: Run model test - api kill
	if: always() && matrix.regression_func == 'api'
	run: \|
	kill -15 "$restful_pid"

	fullbench_run_test:
	if: ${{!cancelled()}}
	needs: ['build-pypi', 'build-pypi-lmdeploy']
	env:
	FULLBENCH_CONDA_ENV: regression_test
	FULLBENCH_REPORT_ROOT: /fs-computility/llm/qa-llm-cicd/eval_report/regression
	COMPASS_DATA_CACHE: /fs-computility/llm/shared/llmeval/datasets/compass_data_cache
	strategy:
	fail-fast: false
	matrix:
	function_type: ${{fromJSON(github.event.inputs.fullbench_eval \|\| '["base_long_context","base_objective","chat_long_context","chat_objective","chat_subjective"]')}}
	runs-on: volc_cu12
	environment: 'prod'
	timeout-minutes: 360 #6hours
	steps:
	- name: Clone repository
	uses: actions/checkout@v2
	with:
	repository: ${{ github.event.inputs.repo_org \|\| 'open-compass/opencompass' }}
	ref: ${{github.event.inputs.repo_ref \|\| 'main'}}
	- name: Download Artifacts
	uses: actions/download-artifact@v4
	with:
	name: my-artifact-${{ github.run_id }}
	- name: Prepare - reinstall opencompass - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	run: \|
	. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.FULLBENCH_CONDA_ENV}}
	pip install opencompass*.whl --no-deps
	- name: Prepare - reinstall lmdeploy - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	uses: actions/download-artifact@v4
	with:
	name: my-artifact-${{ github.run_id }}-py310
	- name: Prepare - reinstall lmdeploy - cu12
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	run: \|
	. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.FULLBENCH_CONDA_ENV}}
	pip install lmdeploy-*.whl --no-deps
	- name: Conda env
	if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}}
	run: \|
	. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.FULLBENCH_CONDA_ENV}}
	conda info --envs
	pip list
	- name: Run command testcase
	run: \|
	. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate
	conda activate ${{env.FULLBENCH_CONDA_ENV}}
	conda info --envs
	export from_tf=TRUE
	opencompass /fs-computility/llm/qa-llm-cicd/ocplayground/template/regression/eval_${{ matrix.function_type }}.py --work-dir ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }} --reuse
	rm regression_result_daily -f && ln -s ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }}/*/summary regression_result_daily
	python -m pytest -m ${{ matrix.function_type }} -s -v --color=yes .github/scripts/oc_score_assert.py


	notify_to_feishu:
	if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' \|\| github.ref_name == 'main') }}
	needs: [daily_run_test, fullbench_run_test]
	environment: 'prod'
	timeout-minutes: 5
	runs-on: self-hosted
	steps:
	- name: notify
	run: \|
	curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

daily_run_test #488

Workflow file

daily_run_test #488

Jobs

Run details

Workflow file for this run