diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 10549f0351da4..e8ad4af2d30ae 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -309,7 +309,7 @@ steps: - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py -- label: Multi-step Tests (4 GPUs) # 20min +- label: Multi-step Tests (4 GPUs) # 10min working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: diff --git a/tests/multi_step/test_correctness.py b/tests/multi_step/test_correctness.py index 40a5371fd2450..bc14311c66424 100644 --- a/tests/multi_step/test_correctness.py +++ b/tests/multi_step/test_correctness.py @@ -9,7 +9,7 @@ MODELS = [ "JackFram/llama-160m", ] -NUM_SCHEDULER_STEPS = [8, 16] # Multi-step decoding steps +NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps NUM_PROMPTS = [10] DEFAULT_SERVER_ARGS: List[str] = [ @@ -43,8 +43,6 @@ async def completions_with_server_args(prompts: List[str], model_name: str, @pytest.mark.parametrize(("tp_size, pp_size"), [ (1, 1), (2, 2), - (1, 2), - (2, 1), ]) @pytest.mark.parametrize("eager_mode", [False, True]) @pytest.mark.parametrize("num_scheduler_steps", NUM_SCHEDULER_STEPS)