Skip to content

Commit

Permalink
[Speculative decoding] Add serving benchmark for llama3 70b + specula…
Browse files Browse the repository at this point in the history
…tive decoding (vllm-project#6964)
  • Loading branch information
cadedaniel authored Jul 31, 2024
1 parent fb4f530 commit c32ab8b
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion .buildkite/nightly-benchmarks/tests/serving-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,26 @@
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200
}
},
{
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
"qps_list": [2],
"server_parameters": {
"model": "meta-llama/Meta-Llama-3-70B-Instruct",
"disable_log_requests": "",
"tensor_parallel_size": 4,
"swap_space": 16,
"speculative_model": "turboderp/Qwama-0.5B-Instruct",
"num_speculative_tokens": 4,
"speculative_draft_tensor_parallel_size": 1,
"use_v2_block_manager": ""
},
"client_parameters": {
"model": "meta-llama/Meta-Llama-3-70B-Instruct",
"backend": "vllm",
"dataset_name": "sharegpt",
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200
}
}
]
]

0 comments on commit c32ab8b

Please sign in to comment.