diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 71798ab7d17c0..91ab872f7428d 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -169,6 +169,14 @@ def _init_tokenizer(self, **tokenizer_init_kwargs): self.tokenizer: BaseTokenizerGroup = get_tokenizer_group( self.parallel_config.tokenizer_pool_config, **init_kwargs) + if len(self.get_tokenizer()) != self.model_config.get_vocab_size(): + logger.warning( + f"The tokenizer's vocabulary size {len(self.get_tokenizer())}" + f" does not match the model's vocabulary size " + f"{self.model_config.get_vocab_size()}. This might " + f"cause an error in decoding. Please change config.json " + "to match the tokenizer's vocabulary size.") + def _verify_args(self) -> None: self.model_config.verify_with_parallel_config(self.parallel_config) self.cache_config.verify_with_parallel_config(self.parallel_config) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 2db884945c491..976046beec245 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -68,6 +68,14 @@ async def _post_init(self): tokenizer_mode=engine_model_config.tokenizer_mode, trust_remote_code=engine_model_config.trust_remote_code) + if len(self.tokenizer) != engine_model_config.get_vocab_size(): + logger.warning( + f"The tokenizer's vocabulary size {len(self.tokenizer)}" + f" does not match the model's vocabulary size " + f"{engine_model_config.get_vocab_size()}. This might " + f"cause an error in decoding. Please change config.json " + "to match the tokenizer's vocabulary size.") + async def show_available_models(self) -> ModelList: """Show available models. Right now we only have one model.""" model_cards = [