Skip to content

Commit

Permalink
[Misc] Move device options to a single place (#8322)
Browse files Browse the repository at this point in the history
  • Loading branch information
akx authored Sep 11, 2024
1 parent 73202db commit 8baa454
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
14 changes: 6 additions & 8 deletions benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from tqdm import tqdm

from vllm import LLM, SamplingParams
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.arg_utils import DEVICE_OPTIONS, EngineArgs
from vllm.inputs import PromptInputs
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.utils import FlexibleArgumentParser
Expand Down Expand Up @@ -205,13 +205,11 @@ def run_to_completion(profile_dir: Optional[str] = None):
default=None,
help=('path to save the pytorch profiler output. Can be visualized '
'with ui.perfetto.dev or Tensorboard.'))
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.')
parser.add_argument("--device",
type=str,
default="auto",
choices=DEVICE_OPTIONS,
help='device type for vLLM execution')
parser.add_argument('--block-size',
type=int,
default=16,
Expand Down
14 changes: 6 additions & 8 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from transformers import (AutoModelForCausalLM, AutoTokenizer,
PreTrainedTokenizerBase)

from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.arg_utils import DEVICE_OPTIONS, AsyncEngineArgs, EngineArgs
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
Expand Down Expand Up @@ -451,13 +451,11 @@ def main(args: argparse.Namespace):
'accuracy issues. FP8_E5M2 (without scaling) is only supported on '
'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is '
'instead supported for common inference criteria.')
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.')
parser.add_argument("--device",
type=str,
default="auto",
choices=DEVICE_OPTIONS,
help='device type for vLLM execution')
parser.add_argument(
"--num-scheduler-steps",
type=int,
Expand Down
15 changes: 11 additions & 4 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@

ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]

DEVICE_OPTIONS = [
"auto",
"cuda",
"neuron",
"cpu",
"openvino",
"tpu",
"xpu",
]


def nullable_str(val: str):
if not val or val == "None":
Expand Down Expand Up @@ -553,10 +563,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument("--device",
type=str,
default=EngineArgs.device,
choices=[
"auto", "cuda", "neuron", "cpu", "openvino",
"tpu", "xpu"
],
choices=DEVICE_OPTIONS,
help='Device type for vLLM execution.')
parser.add_argument('--num-scheduler-steps',
type=int,
Expand Down

0 comments on commit 8baa454

Please sign in to comment.