Skip to content

Commit

Permalink
enable_sp_async_reduce_scatter for qwen_72b && llama2_70b
Browse files Browse the repository at this point in the history
  • Loading branch information
deepllz committed Aug 8, 2024
1 parent 5c57015 commit 86e8cf0
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"pipeline_parallel_degree": 8,
"sharding": "stage1",
"sharding_parallel_config": "split_param enable_stage1_overlap",
"tensor_parallel_config": "enable_delay_scale_loss enable_mp_async_allreduce enable_mp_skip_c_identity enable_mp_fused_linear_param_grad_add",
"tensor_parallel_config": "enable_delay_scale_loss enable_mp_async_allreduce enable_sp_async_reduce_scatter enable_mp_skip_c_identity enable_mp_fused_linear_param_grad_add",
"pipeline_parallel_config": "enable_delay_scale_loss enable_release_grads disable_partial_send_recv enable_overlap_p2p_comm",
"virtual_pp_degree": 5,
"sequence_parallel": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ param+="model_item=qwen-qwen-72b_seqlen4096_pretrain "
param+="max_steps=100 "
param+="gradient_accumulation_steps=32 "
param+="pp_recompute_interval=1 "
param+="tensor_parallel_config=enable_delay_scale_loss,enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add, "
param+="tensor_parallel_config=enable_delay_scale_loss,enable_mp_async_allreduce,enable_sp_async_reduce_scatter,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add, "
#多机新添加的参数
param+="pipeline_parallel_config=enable_delay_scale_loss,enable_sharding_comm_overlap,enable_release_grads, "
param+="max_seq_length=4096 "
Expand Down

0 comments on commit 86e8cf0

Please sign in to comment.