From 5d05434f4707ff49cc92ea8e0f5adb3e1cdd7c55 Mon Sep 17 00:00:00 2001 From: zhengya01 Date: Wed, 21 Sep 2022 11:01:15 +0800 Subject: [PATCH 1/3] update tipc --- tests/test_tipc/common_func.sh | 7 +++-- .../test_tipc/test_train_inference_python.sh | 30 +++++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/tests/test_tipc/common_func.sh b/tests/test_tipc/common_func.sh index 3f0fa66b77ff..b12d3dd9c9db 100644 --- a/tests/test_tipc/common_func.sh +++ b/tests/test_tipc/common_func.sh @@ -56,10 +56,11 @@ function status_check(){ last_status=$1 # the exit code run_command=$2 run_log=$3 + model_name=$4 + log_path=$5 if [ $last_status -eq 0 ]; then - echo -e "\033[33m Run successfully with command - ${run_command}! \033[0m" | tee -a ${run_log} + echo -e "\033[33m Run successfully with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log} else - echo -e "\033[33m Run failed with command - ${run_command}! \033[0m" | tee -a ${run_log} + echo -e "\033[33m Run failed with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log} fi } - diff --git a/tests/test_tipc/test_train_inference_python.sh b/tests/test_tipc/test_train_inference_python.sh index 7f3cde68475e..43ea2b4cac68 100644 --- a/tests/test_tipc/test_train_inference_python.sh +++ b/tests/test_tipc/test_train_inference_python.sh @@ -140,7 +140,8 @@ if [ ${MODE} = "klquant_whole_infer" ]; then infer_value1=$(func_parser_value "${lines[19]}") fi -LOG_PATH="./test_tipc/output/${model_name}" +WORK_PATH=$(pwd) +LOG_PATH="$(pwd)/test_tipc/output/${model_name}/${MODE}" mkdir -p ${LOG_PATH} status_log="${LOG_PATH}/results_python.log" @@ -153,6 +154,7 @@ function func_inference(){ _log_path=$4 _img_dir=$5 _flag_quant=$6 + _gpu=$7 # inference for use_gpu in ${use_gpu_list[*]}; do if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then @@ -171,7 +173,7 @@ function func_inference(){ fi # skip when quant model inference but precision is not int8 set_precision=$(func_set_params "${precision_key}" "${precision}") - _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log" + _save_log_path="${_log_path}/python_infer_cpu_gpus_${_gpu}_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log" set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") @@ -184,7 +186,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}" done done done @@ -202,7 +204,7 @@ function func_inference(){ continue fi for batch_size in ${batch_size_list[*]}; do - _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" + _save_log_path="${_log_path}/python_infer_gpu_gpus_${_gpu}_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") @@ -215,7 +217,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}" done done @@ -335,8 +337,8 @@ else set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}") set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}") if [ ${#ips} -le 26 ];then - save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" nodes=1 + save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}" else IFS="," ips_array=(${ips}) @@ -355,18 +357,21 @@ else cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" fi # run train + _train_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log" eval $cmd - status_check $? "${cmd}" "${status_log}" + cat ${WORK_PATH}/log/workerlog.0 > ${_train_log} + status_check $? "${cmd}" "${status_log}" "${model_name}" "${_train_log}" set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}") # run eval if [ ${eval_py} != "null" ]; then eval ${env} + _eval_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_eval.log" set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}") - eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}" + eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1} >${_eval_log} 2>&1" eval $eval_cmd - status_check $? "${eval_cmd}" "${status_log}" + status_check $? "${eval_cmd}" "${status_log}" "${model_name}" "${_eval_log}" fi # run export model if [ ${run_export} != "null" ]; then @@ -374,9 +379,10 @@ else save_infer_path="${save_log}" set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}") set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}") - export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key}" + _export_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_export.log" + export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} >${_export_log} 2>&1" eval $export_cmd - status_check $? "${export_cmd}" "${status_log}" + status_check $? "${export_cmd}" "${status_log}" "${model_name}" "${_export_log}" #run inference eval $env @@ -386,7 +392,7 @@ else else infer_model_dir=${save_infer_path} fi - func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" + func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" "${gpu}" eval "unset CUDA_VISIBLE_DEVICES" fi From ffcee1b0d464a833a934ed3306b4ba5f0b628338 Mon Sep 17 00:00:00 2001 From: zhengya01 Date: Wed, 21 Sep 2022 11:32:00 +0800 Subject: [PATCH 2/3] update tipc --- tests/test_tipc/test_train_inference_python.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_tipc/test_train_inference_python.sh b/tests/test_tipc/test_train_inference_python.sh index 43ea2b4cac68..dd9d446f2dd7 100644 --- a/tests/test_tipc/test_train_inference_python.sh +++ b/tests/test_tipc/test_train_inference_python.sh @@ -348,18 +348,20 @@ else fi + _train_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log" set_save_model=$(func_set_params "${save_model_key}" "${save_log}") if [ ${#gpu} -le 2 ];then # train with cpu or single gpu - cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} " + cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} >${_train_log} 2>&1" elif [ ${#ips} -le 26 ];then # train with multi-gpu cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" else # train with multi-machine cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" fi # run train - _train_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log" eval $cmd - cat ${WORK_PATH}/log/workerlog.0 > ${_train_log} + if [ ${#gpu} -ge 2 ];then + cat ${WORK_PATH}/log/workerlog.0 > ${_train_log} + fi status_check $? "${cmd}" "${status_log}" "${model_name}" "${_train_log}" set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}") From b59fca0ce21aa62bf09f40b25ea0f2f4d3ccf36f Mon Sep 17 00:00:00 2001 From: zhengya01 Date: Wed, 21 Sep 2022 12:54:55 +0800 Subject: [PATCH 3/3] code style --- tests/test_tipc/common_func.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/test_tipc/common_func.sh b/tests/test_tipc/common_func.sh index b12d3dd9c9db..7c65f275f601 100644 --- a/tests/test_tipc/common_func.sh +++ b/tests/test_tipc/common_func.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + function func_parser_key(){ strs=$1 IFS=":" @@ -53,7 +67,7 @@ function func_parser_params(){ } function status_check(){ - last_status=$1 # the exit code + last_status=$1 # the exit code. run_command=$2 run_log=$3 model_name=$4