Skip to content

Commit

Permalink
[LLM Benchmark]update scripts (#9722)
Browse files Browse the repository at this point in the history
* add no_proxy & del paddlenlp_ops

* update timeout for dpo

* fix sequence_parallel

* add timeout

* add Total_Tokens_per_second_per_gpu

* fix Tokens_per_second_per_gpu

* update Total_Tokens_per_second_per_gpu
  • Loading branch information
Liujie0926 authored Jan 21, 2025
1 parent 730a762 commit 7c1c9ba
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 16 deletions.
Empty file added fix_time
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"max_seq_len": 4096,
"max_prompt_len": 2048,
"pipeline_parallel_config": "disable_partial_send_recv enable_clear_every_step_cache",
"sequence_parallel": 1,
"sequence_parallel": 0,
"bf16": true,
"fp16_opt_level": "O2",
"do_train": true,
Expand Down
6 changes: 3 additions & 3 deletions tests/test_tipc/llm/llama2/benchmark_common/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ python setup.py install
cd -

# install paddlenlp_ops
cd ../csrc/
python setup_cuda.py install
cd -
# cd ../csrc/
# python setup_cuda.py install
# cd -

cd ../llm
cp -r ../tests/test_tipc/llm/llama2/benchmark_common/benchmark_json ./
Expand Down
15 changes: 11 additions & 4 deletions tests/test_tipc/llm/llama2/benchmark_common/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function _set_params(){
skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step
keyword="Effective_Tokens_per_second_per_gpu:" # (必选)解析日志,筛选出性能数据所在行的关键字
is_large_model=True # (可选)普通模型默认为False,如果添加大模型且只取一条ips设置为True
convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"
convergence_key="Total_Tokens_per_second_per_gpu:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"

fp_item="bf16"
# 以下为通用执行命令,无特殊可不用修改
Expand Down Expand Up @@ -105,18 +105,25 @@ function _train(){
;;
esac
cd ../llm/
export no_proxy=bcebos.com
echo "train_cmd: ${train_cmd} log_file: ${log_file}"
python -c "import paddlenlp"
if [[ ${model_name_or_path} =~ "CE" ]];then # CE精度-不限制执行时间
${train_cmd} > ${log_file} 2>&1
else
timeout 30m ${train_cmd} > ${log_file} 2>&1
timeout 60m ${train_cmd} > ${log_file} 2>&1
# echo ${train_cmd}
Effective_Tokens_per_second=`cat ${log_file} | grep -E 'Effective_Tokens_per_second|Effective tokens per second:' \
|awk -F': ' '{print $2}' |awk -F' ' '{print $1}'`
num_gpu=$(echo "$device_num" | sed 's/^.*C//')
ips=$(awk -v a="$Effective_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Effective_Tokens_per_second_per_gpu: ${ips}" >> ${log_file}
Effective_Tokens_per_second_per_gpu=$(awk -v a="$Effective_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Effective_Tokens_per_second_per_gpu: ${Effective_Tokens_per_second_per_gpu}" >> ${log_file}
Train_samples_per_second=`cat ${log_file} | grep 'train_samples_per_second' \
|awk -F'train_samples_per_second: ' '{print $2}' |awk -F', ' '{print $1}'`
length=4096
Total_Tokens_per_second=$(awk -v a="$Train_samples_per_second" -v b="$length" 'BEGIN {printf "%.2f\n", a * b}')
Total_Tokens_per_second_per_gpu=$(awk -v a="$Total_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Total_Tokens_per_second_per_gpu: ${Total_Tokens_per_second_per_gpu}" >> ${log_file}
fi
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"max_seq_len": 4096,
"max_prompt_len": 2048,
"pipeline_parallel_config": "disable_partial_send_recv enable_clear_every_step_cache",
"sequence_parallel": 1,
"sequence_parallel": 0,
"bf16": true,
"fp16_opt_level": "O2",
"do_train": true,
Expand Down
6 changes: 3 additions & 3 deletions tests/test_tipc/llm/qwen2_5/benchmark_common/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ python setup.py install
cd -

# install paddlenlp_ops
cd ../csrc/
python setup_cuda.py install
cd -
# cd ../csrc/
# python setup_cuda.py install
# cd -

cd ../llm
cp -r ../tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json ./
Expand Down
15 changes: 11 additions & 4 deletions tests/test_tipc/llm/qwen2_5/benchmark_common/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function _set_params(){
skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step
keyword="Effective_Tokens_per_second_per_gpu:" # (必选)解析日志,筛选出性能数据所在行的关键字
is_large_model=True # (可选)普通模型默认为False,如果添加大模型且只取一条ips设置为True
convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"
convergence_key="Total_Tokens_per_second_per_gpu:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"

fp_item="bf16"
# 以下为通用执行命令,无特殊可不用修改
Expand Down Expand Up @@ -105,18 +105,25 @@ function _train(){
;;
esac
cd ../llm/
export no_proxy=bcebos.com
echo "train_cmd: ${train_cmd} log_file: ${log_file}"
python -c "import paddlenlp"
if [[ ${model_name_or_path} =~ "CE" ]];then # CE精度-不限制执行时间
${train_cmd} > ${log_file} 2>&1
else
timeout 30m ${train_cmd} > ${log_file} 2>&1
timeout 60m ${train_cmd} > ${log_file} 2>&1
# echo ${train_cmd}
Effective_Tokens_per_second=`cat ${log_file} | grep -E 'Effective_Tokens_per_second|Effective tokens per second:' \
|awk -F': ' '{print $2}' |awk -F' ' '{print $1}'`
num_gpu=$(echo "$device_num" | sed 's/^.*C//')
ips=$(awk -v a="$Effective_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Effective_Tokens_per_second_per_gpu: ${ips}" >> ${log_file}
Effective_Tokens_per_second_per_gpu=$(awk -v a="$Effective_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Effective_Tokens_per_second_per_gpu: ${Effective_Tokens_per_second_per_gpu}" >> ${log_file}
Train_samples_per_second=`cat ${log_file} | grep 'train_samples_per_second' \
|awk -F'train_samples_per_second: ' '{print $2}' |awk -F', ' '{print $1}'`
length=4096
Total_Tokens_per_second=$(awk -v a="$Train_samples_per_second" -v b="$length" 'BEGIN {printf "%.2f\n", a * b}')
Total_Tokens_per_second_per_gpu=$(awk -v a="$Total_Tokens_per_second" -v b="$num_gpu" 'BEGIN {printf "%.2f\n", a / b}')
echo "Total_Tokens_per_second_per_gpu: ${Total_Tokens_per_second_per_gpu}" >> ${log_file}
fi
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
Expand Down

0 comments on commit 7c1c9ba

Please sign in to comment.