daily_run_test #489
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_run_test | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is open-compass/opencompass' | |
type: string | |
default: 'open-compass/opencompass' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
build_lmdeploy: | |
required: false | |
description: 'whether to build lmdeploy' | |
type: boolean | |
default: false | |
repo_org_lmdeploy: | |
required: false | |
description: 'Tested repository organization name. Default is internlm/lmdeploy' | |
type: string | |
default: 'InternLM/lmdeploy' | |
repo_ref_lmdeploy: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
regression_func: | |
required: true | |
description: 'regression functions' | |
type: string | |
default: "['chat_models','base_models', 'chat_obj_fullbench', 'chat_sub_fullbench', 'base_fullbench','cmd', 'api']" | |
cuda_env: | |
required: true | |
description: "regression conda env, eg. ['dsw_cu11','dsw_cu12']" | |
type: string | |
default: "['dsw_cu12']" | |
fullbench_eval: | |
required: true | |
description: 'fullbench volc functions' | |
type: string | |
default: "['base_long_context','base_objective','chat_long_context','chat_objective','chat_subjective']" | |
schedule: | |
- cron: '15 14 * * *' | |
env: | |
HF_DATASETS_OFFLINE: 1 | |
HF_EVALUATE_OFFLINE: 1 | |
TRANSFORMERS_OFFLINE: 1 | |
VLLM_USE_MODELSCOPE: false | |
LMDEPLOY_USE_MODELSCOPE: false | |
HF_HUB_OFFLINE: 1 | |
OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }} | |
jobs: | |
build-pypi: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Set up Python 3.x | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.x | |
- name: Build lagent | |
run: | | |
pip install wheel setuptools | |
python setup.py sdist bdist_wheel | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: dist/* | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }} | |
build-pypi-lmdeploy: | |
if: ${{!cancelled() && (github.event_name != 'schedule' && inputs.build_lmdeploy)}} | |
strategy: | |
matrix: | |
pyver: [py310] | |
runs-on: ubuntu-latest | |
env: | |
PYTHON_VERSION: ${{ matrix.pyver }} | |
PLAT_NAME: manylinux2014_x86_64 | |
DOCKER_TAG: cuda12.1 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org_lmdeploy || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref_lmdeploy || 'main'}} | |
- name: Build | |
run: | | |
echo ${PYTHON_VERSION} | |
echo ${PLAT_NAME} | |
echo ${DOCKER_TAG} | |
echo ${OUTPUT_FOLDER} | |
echo ${GITHUB_RUN_ID} | |
# remove -it | |
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
prepare_env: | |
if: ${{!cancelled()}} | |
needs: ['build-pypi', 'build-pypi-lmdeploy'] | |
strategy: | |
fail-fast: false | |
matrix: | |
cuda_env: ${{ fromJSON(inputs.cuda_env || '["dsw_cu12"]')}} | |
runs-on: ${{ matrix.cuda_env }} | |
env: | |
CONDA_ENV: opencompass_regression | |
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip | |
environment: 'prod' | |
timeout-minutes: 240 #4hours | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Download Artifacts | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }} | |
- name: Remove Conda Env | |
if: always() | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
- name: Prepare - create conda env and install torch - cu11 | |
if: ${{matrix.cuda_env == 'dsw_cu11'}} | |
uses: nick-fields/retry@v3 | |
id: retry1 | |
with: | |
max_attempts: 3 | |
timeout_minutes: 40 | |
command: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu11.txt --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.1+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip uninstall torch torchvision torchaudio -y | |
pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 | |
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu11torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
conda info --envs | |
pip list | |
- name: Prepare - create conda env and install torch - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12'}} | |
uses: nick-fields/retry@v3 | |
id: retry2 | |
with: | |
max_attempts: 3 | |
timeout_minutes: 40 | |
command: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu12.txt --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}} | |
pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}} | |
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} | |
conda info --envs | |
pip list | |
- name: Prepare - reinstall lmdeploy - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Prepare - reinstall lmdeploy - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
pip install lmdeploy-*.whl --no-deps | |
daily_run_test: | |
if: ${{!cancelled()}} | |
needs: prepare_env | |
strategy: | |
fail-fast: false | |
matrix: | |
cuda_env: ${{ fromJSON(inputs.cuda_env || '["dsw_cu12"]')}} | |
regression_func: ${{fromJSON(github.event.inputs.regression_func || '["chat_models","base_models","chat_obj_fullbench","chat_sub_fullbench","base_fullbench","cmd","api"]')}} | |
runs-on: ${{ matrix.cuda_env }} | |
env: | |
CONDA_ENV: opencompass_regression | |
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip | |
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache | |
REPORT_ROOT: /cpfs01/shared/public/qa-llm-cicd/report | |
environment: 'prod' | |
timeout-minutes: 240 #4hours | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Prepare - prepare data and hf model | |
run: | | |
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p | |
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub | |
- name: Run command testcase | |
if: matrix.regression_func == 'cmd' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
export from_tf=TRUE | |
python tools/list_configs.py internlm2_5 mmlu | |
opencompass --models hf_internlm2_5_7b hf_internlm2_1_8b --datasets race_ppl demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --models hf_internlm2_5_7b_chat hf_internlm2_chat_1_8b --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --datasets race_ppl demo_gsm8k_chat_gen --hf-type base --hf-path internlm/internlm2_5-7b --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py | |
opencompass --datasets race_gen demo_gsm8k_chat_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run chat model test | |
if: matrix.regression_func == 'chat_models' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_chat.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run base model test | |
if: matrix.regression_func == 'base_models' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_base.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run chat model test - fullbench | |
if: matrix.regression_func == 'chat_obj_fullbench' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_chat_objective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m chat_obj_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run chat model test - fullbench | |
if: matrix.regression_func == 'chat_sub_fullbench' | |
env: | |
COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache_subset | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_chat_subjective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m chat_sub_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run base model test - fullbench | |
if: matrix.regression_func == 'base_fullbench' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
opencompass .github/scripts/eval_regression_base_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m base_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run model test - api | |
if: matrix.regression_func == 'api' | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} | |
conda info --envs | |
lmdeploy serve api_server internlm/internlm2_5-7b-chat --max-batch-size 256 --model-name internlm2 > ${{env.REPORT_ROOT}}/${{ github.run_id }}/restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
opencompass .github/scripts/eval_regression_api.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details | |
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }}/*/summary regression_result_daily | |
python -m pytest -m api -s -v --color=yes .github/scripts/oc_score_assert.py | |
- name: Run model test - api kill | |
if: always() && matrix.regression_func == 'api' | |
run: | | |
kill -15 "$restful_pid" | |
fullbench_run_test: | |
if: ${{!cancelled()}} | |
needs: ['build-pypi', 'build-pypi-lmdeploy'] | |
env: | |
FULLBENCH_CONDA_ENV: regression_test | |
FULLBENCH_REPORT_ROOT: /fs-computility/llm/qa-llm-cicd/eval_report/regression | |
COMPASS_DATA_CACHE: /fs-computility/llm/shared/llmeval/datasets/compass_data_cache | |
strategy: | |
fail-fast: false | |
matrix: | |
function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_long_context","base_objective","chat_long_context","chat_objective","chat_subjective"]')}} | |
runs-on: volc_cu12 | |
environment: 'prod' | |
timeout-minutes: 360 #6hours | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Download Artifacts | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }} | |
- name: Prepare - reinstall opencompass - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
run: | | |
. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.FULLBENCH_CONDA_ENV}} | |
pip install opencompass*.whl --no-deps | |
- name: Prepare - reinstall lmdeploy - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Prepare - reinstall lmdeploy - cu12 | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
run: | | |
. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.FULLBENCH_CONDA_ENV}} | |
pip install lmdeploy-*.whl --no-deps | |
- name: Conda env | |
if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} | |
run: | | |
. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.FULLBENCH_CONDA_ENV}} | |
conda info --envs | |
pip list | |
- name: Run command testcase | |
run: | | |
. /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.FULLBENCH_CONDA_ENV}} | |
conda info --envs | |
export from_tf=TRUE | |
opencompass /fs-computility/llm/qa-llm-cicd/ocplayground/template/regression/eval_${{ matrix.function_type }}.py --work-dir ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }} --reuse | |
rm regression_result_daily -f && ln -s ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }}/*/summary regression_result_daily | |
python -m pytest -m ${{ matrix.function_type }} -s -v --color=yes .github/scripts/oc_score_assert.py | |
notify_to_feishu: | |
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} | |
needs: [daily_run_test, fullbench_run_test] | |
environment: 'prod' | |
timeout-minutes: 5 | |
runs-on: self-hosted | |
steps: | |
- name: notify | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }} |