From 24862bce504479415620a0b7cff9216b1ca8c76b Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 21 Aug 2024 14:50:44 -0300 Subject: [PATCH 1/8] Validate the that the input prompts aren't empty This avoids a async loop crash that takes down the server Signed-off-by: Max de Bayser --- vllm/engine/async_llm_engine.py | 2 ++ vllm/engine/llm_engine.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 9911cc9bdd84f..8686aceb0fdc8 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -543,6 +543,7 @@ async def process_model_inputs_async( inputs, request_id=request_id, ) + self._validate_enc_dec_inputs(model_inputs) else: if is_explicit_encoder_decoder_prompt(inputs): raise ValueError("Cannot pass encoder-decoder prompt " @@ -555,6 +556,7 @@ async def process_model_inputs_async( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) + self._validate_dec_only_inputs(model_inputs) return self.input_processor(model_inputs) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 94aed6b8c50c7..57fbb30234d4a 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -957,6 +957,7 @@ def process_model_inputs( inputs, request_id=request_id, ) + self._validate_enc_dec_inputs(model_inputs) else: if is_explicit_encoder_decoder_prompt(inputs): raise ValueError("Cannot pass encoder-decoder prompt " @@ -969,6 +970,7 @@ def process_model_inputs( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) + self._validate_dec_only_inputs(model_inputs) return self.input_processor(model_inputs) @@ -1642,3 +1644,13 @@ def is_encoder_decoder_model(self): def is_embedding_model(self): return self.model_config.is_embedding_model + + def _validate_dec_only_inputs(self, inputs: LLMInputs): + if "prompt_token_ids" not in inputs or len( + inputs["prompt_token_ids"]) == 0: + raise ValueError("Empty prompt") + + def _validate_enc_dec_inputs(self, inputs: EncoderDecoderLLMInputs): + if "encoder_prompt_token_ids" not in inputs or\ + len(inputs["encoder_prompt_token_ids"]) == 0: + raise ValueError("Empty prompt") From 9a13407b637905be603cce6ae05eea6ac8c4df39 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 22 Aug 2024 13:30:47 -0300 Subject: [PATCH 2/8] Move validation of empty promt to add_request fucntions Also add unit tests for LLM and OpenAI entrypoints Signed-off-by: Max de Bayser --- tests/entrypoints/llm/test_generate.py | 6 ++++++ tests/entrypoints/openai/test_completion.py | 15 +++++++++++++ vllm/engine/async_llm_engine.py | 3 +-- vllm/engine/llm_engine.py | 24 ++++++++++----------- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index c426e9b4ee899..9df11df98aa93 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -142,6 +142,12 @@ def test_multiple_sampling_params(llm: LLM): assert len(PROMPTS) == len(outputs) +def test_empty_prompt(): + llm = LLM(model="gpt2") + with pytest.raises(ValueError, match='Empty prompt'): + llm.generate([""]) + + def test_chat(): llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py index 18f41f5fc671b..aed21af694cd7 100644 --- a/tests/entrypoints/openai/test_completion.py +++ b/tests/entrypoints/openai/test_completion.py @@ -738,3 +738,18 @@ async def test_guided_decoding_type_error(client: openai.AsyncOpenAI, prompt="Give an example string that fits this regex", extra_body=dict(guided_regex=sample_regex, guided_json=sample_json_schema)) + + +@pytest.mark.asyncio +async def test_empty_prompt(): + model_name = "gpt2" + server_args = ["--disable-frontend-multiprocessing", "--enforce-eager"] + with RemoteOpenAIServer(model_name, server_args) as remote_server: + client = remote_server.get_async_client() + + with pytest.raises(openai.BadRequestError, + match=re.compile('.+Empty prompt.+')): + await client.completions.create(model=model_name, + prompt="", + max_tokens=5, + temperature=0.0) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 4e1d2d1bf9619..d77fb96b9432e 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -543,7 +543,6 @@ async def process_model_inputs_async( inputs, request_id=request_id, ) - self._validate_enc_dec_inputs(model_inputs) else: if is_explicit_encoder_decoder_prompt(inputs): raise ValueError("Cannot pass encoder-decoder prompt " @@ -556,7 +555,6 @@ async def process_model_inputs_async( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) - self._validate_dec_only_inputs(model_inputs) return self.input_processor(model_inputs) @@ -583,6 +581,7 @@ async def add_request_async( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) + self._validate_model_inputs(processed_inputs) self._add_processed_request( request_id=request_id, diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 1e100d24f9109..fd9445eaff0ce 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -900,10 +900,8 @@ def _build_decoder_only_llm_inputs( prompt_adapter_request: Optional[PromptAdapterRequest], ) -> LLMInputs: prompt, prompt_token_ids, multi_modal_data = prompt_comps - prompt_token_ids = self._apply_prompt_adapter( prompt_token_ids, prompt_adapter_request=prompt_adapter_request) - return LLMInputs(prompt_token_ids=prompt_token_ids, prompt=prompt, multi_modal_data=multi_modal_data) @@ -957,7 +955,6 @@ def process_model_inputs( inputs, request_id=request_id, ) - self._validate_enc_dec_inputs(model_inputs) else: if is_explicit_encoder_decoder_prompt(inputs): raise ValueError("Cannot pass encoder-decoder prompt " @@ -970,7 +967,6 @@ def process_model_inputs( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) - self._validate_dec_only_inputs(model_inputs) return self.input_processor(model_inputs) @@ -1038,6 +1034,7 @@ def add_request( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) + self._validate_model_inputs(processed_inputs) self._add_processed_request( request_id=request_id, @@ -1650,12 +1647,13 @@ def is_encoder_decoder_model(self): def is_embedding_model(self): return self.model_config.is_embedding_model - def _validate_dec_only_inputs(self, inputs: LLMInputs): - if "prompt_token_ids" not in inputs or len( - inputs["prompt_token_ids"]) == 0: - raise ValueError("Empty prompt") - - def _validate_enc_dec_inputs(self, inputs: EncoderDecoderLLMInputs): - if "encoder_prompt_token_ids" not in inputs or\ - len(inputs["encoder_prompt_token_ids"]) == 0: - raise ValueError("Empty prompt") + def _validate_model_inputs(self, inputs: Union[LLMInputs, + EncoderDecoderLLMInputs]): + if self.is_encoder_decoder_model(): + if "encoder_prompt_token_ids" not in inputs or\ + len(inputs["encoder_prompt_token_ids"]) == 0: + raise ValueError("Empty prompt") + else: + if "prompt_token_ids" not in inputs or len( + inputs["prompt_token_ids"]) == 0: + raise ValueError("Empty prompt") From 85ee9ff94094eb318794736e57fb54397e656589 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 22 Aug 2024 16:19:42 -0300 Subject: [PATCH 3/8] move test to another file due to conflicting fixtures Signed-off-by: Max de Bayser --- tests/entrypoints/openai/test_completion.py | 15 ------------- .../openai/test_prompt_validation.py | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 15 deletions(-) create mode 100644 tests/entrypoints/openai/test_prompt_validation.py diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py index aed21af694cd7..18f41f5fc671b 100644 --- a/tests/entrypoints/openai/test_completion.py +++ b/tests/entrypoints/openai/test_completion.py @@ -738,18 +738,3 @@ async def test_guided_decoding_type_error(client: openai.AsyncOpenAI, prompt="Give an example string that fits this regex", extra_body=dict(guided_regex=sample_regex, guided_json=sample_json_schema)) - - -@pytest.mark.asyncio -async def test_empty_prompt(): - model_name = "gpt2" - server_args = ["--disable-frontend-multiprocessing", "--enforce-eager"] - with RemoteOpenAIServer(model_name, server_args) as remote_server: - client = remote_server.get_async_client() - - with pytest.raises(openai.BadRequestError, - match=re.compile('.+Empty prompt.+')): - await client.completions.create(model=model_name, - prompt="", - max_tokens=5, - temperature=0.0) diff --git a/tests/entrypoints/openai/test_prompt_validation.py b/tests/entrypoints/openai/test_prompt_validation.py new file mode 100644 index 0000000000000..a63c693c21ce6 --- /dev/null +++ b/tests/entrypoints/openai/test_prompt_validation.py @@ -0,0 +1,22 @@ +# imports for guided decoding tests +import re + +import openai +import pytest + +from ...utils import RemoteOpenAIServer + + +@pytest.mark.asyncio +async def test_empty_prompt(): + model_name = "gpt2" + server_args = ["--disable-frontend-multiprocessing", "--enforce-eager"] + with RemoteOpenAIServer(model_name, server_args) as remote_server: + client = remote_server.get_async_client() + + with pytest.raises(openai.BadRequestError, + match=re.compile('.+Empty prompt.+')): + await client.completions.create(model=model_name, + prompt="", + max_tokens=5, + temperature=0.0) \ No newline at end of file From 4bef6f63c20e6b275c62789b181e977c39f508ae Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 22 Aug 2024 16:45:34 -0300 Subject: [PATCH 4/8] enable frontend multiprocessing Signed-off-by: Max de Bayser --- tests/entrypoints/openai/test_prompt_validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_prompt_validation.py b/tests/entrypoints/openai/test_prompt_validation.py index a63c693c21ce6..6b88d6cf44965 100644 --- a/tests/entrypoints/openai/test_prompt_validation.py +++ b/tests/entrypoints/openai/test_prompt_validation.py @@ -10,7 +10,7 @@ @pytest.mark.asyncio async def test_empty_prompt(): model_name = "gpt2" - server_args = ["--disable-frontend-multiprocessing", "--enforce-eager"] + server_args = ["--enforce-eager"] with RemoteOpenAIServer(model_name, server_args) as remote_server: client = remote_server.get_async_client() @@ -19,4 +19,4 @@ async def test_empty_prompt(): await client.completions.create(model=model_name, prompt="", max_tokens=5, - temperature=0.0) \ No newline at end of file + temperature=0.0) From aba94ee44cbcdcddad361267f286c0f5700735f1 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 22 Aug 2024 16:59:06 -0300 Subject: [PATCH 5/8] move test to another file due to conflicting fixtures Signed-off-by: Max de Bayser --- tests/entrypoints/llm/test_generate.py | 6 ------ tests/entrypoints/llm/test_prompt_validation.py | 9 +++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 tests/entrypoints/llm/test_prompt_validation.py diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index 9df11df98aa93..c426e9b4ee899 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -142,12 +142,6 @@ def test_multiple_sampling_params(llm: LLM): assert len(PROMPTS) == len(outputs) -def test_empty_prompt(): - llm = LLM(model="gpt2") - with pytest.raises(ValueError, match='Empty prompt'): - llm.generate([""]) - - def test_chat(): llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") diff --git a/tests/entrypoints/llm/test_prompt_validation.py b/tests/entrypoints/llm/test_prompt_validation.py new file mode 100644 index 0000000000000..7b1466ae91dea --- /dev/null +++ b/tests/entrypoints/llm/test_prompt_validation.py @@ -0,0 +1,9 @@ +import pytest + +from vllm import LLM + + +def test_empty_prompt(): + llm = LLM(model="gpt2") + with pytest.raises(ValueError, match='Empty prompt'): + llm.generate([""]) From 5f972adc82ff42460a0b9bbb6823f6a94a2be089 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 22 Aug 2024 17:18:30 -0300 Subject: [PATCH 6/8] move validation to a better place Signed-off-by: Max de Bayser --- vllm/engine/async_llm_engine.py | 1 - vllm/engine/llm_engine.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index d77fb96b9432e..8812b853c0665 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -581,7 +581,6 @@ async def add_request_async( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) - self._validate_model_inputs(processed_inputs) self._add_processed_request( request_id=request_id, diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index fd9445eaff0ce..dc53f57574a8b 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -591,6 +591,7 @@ def _add_processed_request( prompt_adapter_request: Optional[PromptAdapterRequest], trace_headers: Optional[Mapping[str, str]] = None, ) -> None: + self._validate_model_inputs(processed_inputs) # Create the sequences. block_size = self.cache_config.block_size seq_id = next(self.seq_counter) @@ -900,8 +901,10 @@ def _build_decoder_only_llm_inputs( prompt_adapter_request: Optional[PromptAdapterRequest], ) -> LLMInputs: prompt, prompt_token_ids, multi_modal_data = prompt_comps + prompt_token_ids = self._apply_prompt_adapter( prompt_token_ids, prompt_adapter_request=prompt_adapter_request) + return LLMInputs(prompt_token_ids=prompt_token_ids, prompt=prompt, multi_modal_data=multi_modal_data) @@ -1034,7 +1037,6 @@ def add_request( lora_request=lora_request, prompt_adapter_request=prompt_adapter_request, ) - self._validate_model_inputs(processed_inputs) self._add_processed_request( request_id=request_id, From 4623dfd0891814b338ce2a1345ed7d4ed19dd2d0 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 22 Aug 2024 19:10:03 -0400 Subject: [PATCH 7/8] Simplify prompt check --- vllm/engine/llm_engine.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index dc53f57574a8b..8c98b64181d06 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1651,11 +1651,7 @@ def is_embedding_model(self): def _validate_model_inputs(self, inputs: Union[LLMInputs, EncoderDecoderLLMInputs]): - if self.is_encoder_decoder_model(): - if "encoder_prompt_token_ids" not in inputs or\ - len(inputs["encoder_prompt_token_ids"]) == 0: - raise ValueError("Empty prompt") - else: - if "prompt_token_ids" not in inputs or len( - inputs["prompt_token_ids"]) == 0: - raise ValueError("Empty prompt") + prompt_key = "encoder_prompt_token_ids" \ + if self.is_encoder_decoder_model() else "prompt_token_ids" + if not inputs.get(prompt_key): + raise ValueError("Prompt cannot be empty") From 68610cbe870be02cd640fbaaf5df83c4b658bb54 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 22 Aug 2024 21:28:13 -0400 Subject: [PATCH 8/8] Make test match updated exception message --- tests/entrypoints/llm/test_prompt_validation.py | 2 +- tests/entrypoints/openai/test_prompt_validation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/llm/test_prompt_validation.py b/tests/entrypoints/llm/test_prompt_validation.py index 7b1466ae91dea..565dfa01346cc 100644 --- a/tests/entrypoints/llm/test_prompt_validation.py +++ b/tests/entrypoints/llm/test_prompt_validation.py @@ -5,5 +5,5 @@ def test_empty_prompt(): llm = LLM(model="gpt2") - with pytest.raises(ValueError, match='Empty prompt'): + with pytest.raises(ValueError, match='Prompt cannot be empty'): llm.generate([""]) diff --git a/tests/entrypoints/openai/test_prompt_validation.py b/tests/entrypoints/openai/test_prompt_validation.py index 6b88d6cf44965..0a573a0066d32 100644 --- a/tests/entrypoints/openai/test_prompt_validation.py +++ b/tests/entrypoints/openai/test_prompt_validation.py @@ -15,7 +15,7 @@ async def test_empty_prompt(): client = remote_server.get_async_client() with pytest.raises(openai.BadRequestError, - match=re.compile('.+Empty prompt.+')): + match=re.compile('.+Prompt cannot be empty.+')): await client.completions.create(model=model_name, prompt="", max_tokens=5,