diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 10549f0351da4..e8ad4af2d30ae 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -309,7 +309,7 @@ steps:
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
 
-- label: Multi-step Tests (4 GPUs) # 20min
+- label: Multi-step Tests (4 GPUs) # 10min
   working_dir: "/vllm-workspace/tests"
   num_gpus: 4
   source_file_dependencies:
diff --git a/tests/multi_step/test_correctness.py b/tests/multi_step/test_correctness.py
index 40a5371fd2450..bc14311c66424 100644
--- a/tests/multi_step/test_correctness.py
+++ b/tests/multi_step/test_correctness.py
@@ -9,7 +9,7 @@
 MODELS = [
     "JackFram/llama-160m",
 ]
-NUM_SCHEDULER_STEPS = [8, 16]  # Multi-step decoding steps
+NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]
 
 DEFAULT_SERVER_ARGS: List[str] = [
@@ -43,8 +43,6 @@ async def completions_with_server_args(prompts: List[str], model_name: str,
 @pytest.mark.parametrize(("tp_size, pp_size"), [
     (1, 1),
     (2, 2),
-    (1, 2),
-    (2, 1),
 ])
 @pytest.mark.parametrize("eager_mode", [False, True])
 @pytest.mark.parametrize("num_scheduler_steps", NUM_SCHEDULER_STEPS)