[Feature] Support G-Pass@k and LiveMathBench #1412
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pr_run_test | |
on: | |
pull_request: | |
paths-ignore: | |
- 'README.md' | |
- 'README_zh-CN.md' | |
- 'docs/**' | |
- 'configs/**' | |
- 'tools/**' | |
workflow_dispatch: | |
schedule: | |
- cron: '56 22 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
CONDA_ENV: opencompass_ | |
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd | |
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub | |
HF_DATASETS_OFFLINE: 1 | |
TRANSFORMERS_OFFLINE: 1 | |
HF_HUB_OFFLINE: 1 | |
VLLM_USE_MODELSCOPE: false | |
LMDEPLOY_USE_MODELSCOPE: false | |
jobs: | |
pr_run_test: | |
runs-on: dsw_cu12 | |
environment: 'prod' | |
timeout-minutes: 30 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v2 | |
- name: Prepare - Install opencompass | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}${{ runner.name }} | |
python3 -m pip uninstall opencompass -y | |
python3 -m pip install -e . --cache-dir ${{env.USERSPACE_PREFIX}}/.cache/pip | |
conda info --envs | |
- name: Prepare - prepare data and hf model | |
run: | | |
cp -r ${{env.USERSPACE_PREFIX}}/data . | |
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p | |
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub | |
- name: Run test | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}${{ runner.name }} | |
conda info --envs | |
rm -rf regression_result | |
opencompass --models hf_internlm2_5_20b_chat --datasets demo_gsm8k_chat_gen --work-dir regression_result1 --debug | |
opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen --work-dir regression_result2 --debug --max-num-workers 2 | |
opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen -a lmdeploy --work-dir regression_result3 --debug --max-num-workers 2 | |
- name: Get result | |
run: | | |
score=$(sed -n '$p' regression_result1/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 88 && ${score%.*} <= 89 )); then | |
echo "score is $score between 88 and 89" | |
else | |
echo "score is $score not between 88 and 89" | |
exit 1 | |
fi | |
score=$(sed -n '$p' regression_result2/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 87 && ${score%.*} <= 88 )); then | |
echo "score is $score between 87 and 88" | |
else | |
echo "score is $score not between 87 and 88" | |
exit 1 | |
fi | |
score=$(sed -n '$p' regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 87 && ${score%.*} <= 89 )); then | |
echo "score is $score between 87 and 89" | |
else | |
echo "score is $score not between 87 and 89" | |
exit 1 | |
fi | |
rm -rf regression_result1 & rm -rf regression_result2 & rm -rf regression_result3 | |
- name: Uninstall opencompass | |
if: always() | |
run: | | |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate | |
conda activate ${{env.CONDA_ENV}}${{ runner.name }} | |
python3 -m pip uninstall opencompass -y | |
conda info --envs | |
notify_to_feishu: | |
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} | |
needs: [pr_run_test] | |
environment: 'prod' | |
timeout-minutes: 5 | |
runs-on: self-hosted | |
steps: | |
- name: notify | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }} |