diff --git a/tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp1_sd4_acc32_dygraph.sh b/tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp4_sd1_acc32_dygraph.sh
similarity index 93%
rename from tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp1_sd4_acc32_dygraph.sh
rename to tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp4_sd1_acc32_dygraph.sh
index 6f1e9fc8e7f1..38cda1cbeb56 100644
--- a/tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp1_sd4_acc32_dygraph.sh
+++ b/tests/test_tipc/llm/llama2/N4C32/llama2-70b_dpo_bs16_bf16_tp8_pp4_sd1_acc32_dygraph.sh
@@ -16,8 +16,8 @@
 param="model_name_or_path=meta-llama/Llama-2-70b "
 param+="per_device_train_batch_size=1 "
 param+="tensor_parallel_degree=8 "
-param+="pipeline_parallel_degree=1 "
-param+="sharding_parallel_degree=4 "
+param+="pipeline_parallel_degree=4 "
+param+="sharding_parallel_degree=1 "
 param+="gradient_accumulation_steps=32 "
 param+="run_stage=dpo "
 param+="run_mode=tp8_pp1_sd4_acc32_dygraph "
diff --git a/tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp4_pp1_acc4_dygraph.sh b/tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp8_pp4_acc32_dygraph.sh
similarity index 89%
rename from tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp4_pp1_acc4_dygraph.sh
rename to tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp8_pp4_acc32_dygraph.sh
index 2f5235b39c2c..dde5d4d5a61c 100644
--- a/tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp4_pp1_acc4_dygraph.sh
+++ b/tests/test_tipc/llm/llama2/N4C32/llama2-70b_lora_bs16_bf16_tp8_pp4_acc32_dygraph.sh
@@ -15,9 +15,9 @@
 
 param="model_name_or_path=meta-llama/Llama-2-70b "
 param+="per_device_train_batch_size=1 "
-param+="tensor_parallel_degree=4 "
-param+="pipeline_parallel_degree=1 "
-param+="gradient_accumulation_steps=4 "
+param+="tensor_parallel_degree=8 "
+param+="pipeline_parallel_degree=4 "
+param+="gradient_accumulation_steps=32 "
 param+="run_stage=lora "
 param+="run_mode=tp4_pp1_acc4_dygraph "
 param+="device_num=N4C32 "
diff --git a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/dpo.json b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/dpo.json
index 4ca9c223d2de..c90f058e1f0b 100644
--- a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/dpo.json
+++ b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/dpo.json
@@ -21,13 +21,13 @@
     "disable_tqdm": true,
     "load_best_model_at_end": true,
     "tensor_parallel_degree": 8,
-    "sharding_parallel_degree": 4,
-    "pipeline_parallel_degree": 1,
+    "sharding_parallel_degree": 1,
+    "pipeline_parallel_degree": 4,
     "sharding": "stage2",
     "use_flash_attention": true,
     "flash_mask": true,
     "recompute": true,
-    "recompute_granularity": "full_attn",
+    "recompute_granularity": "full",
     "benchmark": true,
     "unified_checkpoint": true,
     "autotuner_benchmark":false,
diff --git a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/lora.json b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/lora.json
index f3efc69b6cb0..f2101b1dc48d 100644
--- a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/lora.json
+++ b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/lora.json
@@ -3,7 +3,7 @@
     "dataset_name_or_path": "./data/sft_benchmark_train/",
     "output_dir": "./checkpoints/lora_ckpts",
     "per_device_train_batch_size": 1,
-    "gradient_accumulation_steps": 4,
+    "gradient_accumulation_steps": 32,
     "per_device_eval_batch_size": 8,
     "eval_accumulation_steps":16,
     "num_train_epochs": 1,
@@ -30,11 +30,12 @@
     "load_best_model_at_end": true,
     "eval_with_do_generation": false,
     "metric_for_best_model": "accuracy",
-    "recompute": false,
+    "recompute": true,
 	"recompute_granularity": "full",
     "save_total_limit": 1,
-    "tensor_parallel_degree": 4,
-    "pipeline_parallel_degree": 1,
+    "tensor_parallel_output": true,
+    "tensor_parallel_degree": 8,
+    "pipeline_parallel_degree": 4,
     "lora": true,
     "unified_checkpoint": true,
 	"benchmark": true,
diff --git a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/sft.json b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/sft.json
index f3cddd5cbc81..186961130ae8 100644
--- a/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/sft.json
+++ b/tests/test_tipc/llm/llama2/benchmark_common/benchmark_json/llama2-70b/sft.json
@@ -30,7 +30,7 @@
     "load_best_model_at_end": true,
     "eval_with_do_generation": false,
     "metric_for_best_model": "accuracy",
-    "recompute": false,
+    "recompute": true,
 	"recompute_granularity": "full",
     "save_total_limit": 1,
 	"benchmark": true,
diff --git a/tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_sd4_acc4_dygraph.sh b/tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_pp4_acc32_dygraph.sh
similarity index 93%
rename from tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_sd4_acc4_dygraph.sh
rename to tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_pp4_acc32_dygraph.sh
index e7940e7d9999..5c7b9d0c68eb 100644
--- a/tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_sd4_acc4_dygraph.sh
+++ b/tests/test_tipc/llm/qwen2_5/N4C32/qwen-qwen2_5-72b_dpo_bs16_bf16_tp8_pp4_acc32_dygraph.sh
@@ -16,8 +16,8 @@
 param="model_name_or_path=Qwen/Qwen2.5-72B "
 param+="per_device_train_batch_size=1 "
 param+="tensor_parallel_degree=8 "
-param+="sharding_parallel_degree=4 "
-param+="gradient_accumulation_steps=4 "
+param+="pipeline_parallel_degree=4 "
+param+="gradient_accumulation_steps=32 "
 param+="run_stage=dpo "
 param+="run_mode=tp8_sd4_acc4_dygraph "
 param+="device_num=N4C32 "
diff --git a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/dpo.json b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/dpo.json
index d6e41e0bc52e..2df563143693 100644
--- a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/dpo.json
+++ b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/dpo.json
@@ -4,7 +4,7 @@
     "dev_dataset_path": "./data/dpo_benchmark_train/dev.json",
     "output_dir": "./checkpoints/dpo_ckpts",
     "per_device_train_batch_size": 1,
-    "gradient_accumulation_steps": 4,
+    "gradient_accumulation_steps": 32,
     "per_device_eval_batch_size": 1,
     "num_train_epochs": 1,
     "learning_rate": 1e-06,
@@ -22,11 +22,11 @@
     "load_best_model_at_end": true,
     "tensor_parallel_output": true,
     "tensor_parallel_degree": 8,
-    "sharding_parallel_degree": 4,
+    "pipeline_parallel_degree": 4,
     "sharding": "stage2",
     "use_flash_attention": true,
     "flash_mask": true,
-    "recompute": false,
+    "recompute": true,
     "recompute_granularity": "full",
     "benchmark": true,
     "unified_checkpoint": true,
diff --git a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/lora.json b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/lora.json
index e340c7258ac4..21815924487a 100644
--- a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/lora.json
+++ b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/lora.json
@@ -30,7 +30,7 @@
     "load_best_model_at_end": true,
     "eval_with_do_generation": false,
     "metric_for_best_model": "accuracy",
-    "recompute": false,
+    "recompute": true,
 	"recompute_granularity": "full",
     "save_total_limit": 1,
     "tensor_parallel_degree": 8,
diff --git a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/sft.json b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/sft.json
index 4f901cd74ca4..4e3e43b7a948 100644
--- a/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/sft.json
+++ b/tests/test_tipc/llm/qwen2_5/benchmark_common/benchmark_json/qwen-qwen2_5-72b/sft.json
@@ -30,7 +30,7 @@
     "load_best_model_at_end": true,
     "eval_with_do_generation": false,
     "metric_for_best_model": "accuracy",
-    "recompute": false,
+    "recompute": true,
 	"recompute_granularity": "full",
     "save_total_limit": 1,
 	"benchmark": true,