Skip to content

Commit

Permalink
update N4C32 config
Browse files Browse the repository at this point in the history
  • Loading branch information
Liujie0926 committed Dec 23, 2024
1 parent e6a2f30 commit 15b9336
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
param="model_name_or_path=meta-llama/Llama-2-70b "
param+="per_device_train_batch_size=1 "
param+="tensor_parallel_degree=8 "
param+="pipeline_parallel_degree=1 "
param+="sharding_parallel_degree=4 "
param+="pipeline_parallel_degree=4 "
param+="sharding_parallel_degree=1 "
param+="gradient_accumulation_steps=32 "
param+="run_stage=dpo "
param+="run_mode=tp8_pp1_sd4_acc32_dygraph "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@

param="model_name_or_path=meta-llama/Llama-2-70b "
param+="per_device_train_batch_size=1 "
param+="tensor_parallel_degree=4 "
param+="pipeline_parallel_degree=1 "
param+="gradient_accumulation_steps=4 "
param+="tensor_parallel_degree=8 "
param+="pipeline_parallel_degree=4 "
param+="gradient_accumulation_steps=32 "
param+="run_stage=lora "
param+="run_mode=tp4_pp1_acc4_dygraph "
param+="device_num=N4C32 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
"disable_tqdm": true,
"load_best_model_at_end": true,
"tensor_parallel_degree": 8,
"sharding_parallel_degree": 4,
"pipeline_parallel_degree": 1,
"sharding_parallel_degree": 1,
"pipeline_parallel_degree": 4,
"sharding": "stage2",
"use_flash_attention": true,
"flash_mask": true,
"recompute": true,
"recompute_granularity": "full_attn",
"recompute_granularity": "full",
"benchmark": true,
"unified_checkpoint": true,
"autotuner_benchmark":false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"dataset_name_or_path": "./data/sft_benchmark_train/",
"output_dir": "./checkpoints/lora_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 4,
"gradient_accumulation_steps": 32,
"per_device_eval_batch_size": 8,
"eval_accumulation_steps":16,
"num_train_epochs": 1,
Expand All @@ -30,11 +30,12 @@
"load_best_model_at_end": true,
"eval_with_do_generation": false,
"metric_for_best_model": "accuracy",
"recompute": false,
"recompute": true,
"recompute_granularity": "full",
"save_total_limit": 1,
"tensor_parallel_degree": 4,
"pipeline_parallel_degree": 1,
"tensor_parallel_output": true,
"tensor_parallel_degree": 8,
"pipeline_parallel_degree": 4,
"lora": true,
"unified_checkpoint": true,
"benchmark": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"load_best_model_at_end": true,
"eval_with_do_generation": false,
"metric_for_best_model": "accuracy",
"recompute": false,
"recompute": true,
"recompute_granularity": "full",
"save_total_limit": 1,
"benchmark": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
param="model_name_or_path=Qwen/Qwen2.5-72B "
param+="per_device_train_batch_size=1 "
param+="tensor_parallel_degree=8 "
param+="sharding_parallel_degree=4 "
param+="gradient_accumulation_steps=4 "
param+="pipeline_parallel_degree=4 "
param+="gradient_accumulation_steps=32 "
param+="run_stage=dpo "
param+="run_mode=tp8_sd4_acc4_dygraph "
param+="device_num=N4C32 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"dev_dataset_path": "./data/dpo_benchmark_train/dev.json",
"output_dir": "./checkpoints/dpo_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 4,
"gradient_accumulation_steps": 32,
"per_device_eval_batch_size": 1,
"num_train_epochs": 1,
"learning_rate": 1e-06,
Expand All @@ -22,11 +22,11 @@
"load_best_model_at_end": true,
"tensor_parallel_output": true,
"tensor_parallel_degree": 8,
"sharding_parallel_degree": 4,
"pipeline_parallel_degree": 4,
"sharding": "stage2",
"use_flash_attention": true,
"flash_mask": true,
"recompute": false,
"recompute": true,
"recompute_granularity": "full",
"benchmark": true,
"unified_checkpoint": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"load_best_model_at_end": true,
"eval_with_do_generation": false,
"metric_for_best_model": "accuracy",
"recompute": false,
"recompute": true,
"recompute_granularity": "full",
"save_total_limit": 1,
"tensor_parallel_degree": 8,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"load_best_model_at_end": true,
"eval_with_do_generation": false,
"metric_for_best_model": "accuracy",
"recompute": false,
"recompute": true,
"recompute_granularity": "full",
"save_total_limit": 1,
"benchmark": true,
Expand Down

0 comments on commit 15b9336

Please sign in to comment.