diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index dfe83ddb731d4..4f4e79d10a677 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -10,12 +10,16 @@ from vllm.envs import VLLM_USE_MODELSCOPE from vllm.logger import init_logger +# yapf conflicts with isort for this block +# yapf: disable from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig, EAGLEConfig, ExaoneConfig, - InternVLChatConfig, JAISConfig, - MedusaConfig, MLPSpeculatorConfig, - MPTConfig, NemotronConfig, - RWConfig, UltravoxConfig) + GraniteConfig, InternVLChatConfig, + JAISConfig, MedusaConfig, + MLPSpeculatorConfig, MPTConfig, + NemotronConfig, RWConfig, + UltravoxConfig) +# yapf: enable from vllm.transformers_utils.utils import check_gguf_file if VLLM_USE_MODELSCOPE: @@ -39,6 +43,9 @@ "internvl_chat": InternVLChatConfig, "nemotron": NemotronConfig, "ultravox": UltravoxConfig, + # Granite can be removed from here once we have upgraded to + # transformers 4.45+ + "granite": GraniteConfig, } for name, cls in _CONFIG_REGISTRY.items(): @@ -62,29 +69,36 @@ def get_config( kwargs["gguf_file"] = Path(model).name model = Path(model).parent - try: - config = AutoConfig.from_pretrained( - model, - trust_remote_code=trust_remote_code, - revision=revision, - code_revision=code_revision, - **kwargs) - except ValueError as e: - if (not trust_remote_code and - "requires you to execute the configuration file" in str(e)): - err_msg = ( - "Failed to load the model config. If the model is a custom " - "model not yet available in the HuggingFace transformers " - "library, consider setting `trust_remote_code=True` in LLM " - "or using the `--trust-remote-code` flag in the CLI.") - raise RuntimeError(err_msg) from e - else: - raise e - if config.model_type in _CONFIG_REGISTRY: - config_class = _CONFIG_REGISTRY[config.model_type] + config_dict, _ = PretrainedConfig.get_config_dict( + model, revision=revision, code_revision=code_revision, **kwargs) + + # Use custom model class if it's in our registry + model_type = config_dict.get("model_type") + if model_type in _CONFIG_REGISTRY: + config_class = _CONFIG_REGISTRY[model_type] config = config_class.from_pretrained(model, revision=revision, code_revision=code_revision) + else: + try: + config = AutoConfig.from_pretrained( + model, + trust_remote_code=trust_remote_code, + revision=revision, + code_revision=code_revision, + **kwargs) + except ValueError as e: + if (not trust_remote_code + and "requires you to execute the configuration file" + in str(e)): + err_msg = ( + "Failed to load the model config. If the model is a custom " + "model not yet available in the HuggingFace transformers " + "library, consider setting `trust_remote_code=True` in LLM " + "or using the `--trust-remote-code` flag in the CLI.") + raise RuntimeError(err_msg) from e + else: + raise e # Special architecture mapping check for GGUF models if is_gguf: diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 736878b35ad49..8381c5227584e 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -6,6 +6,7 @@ # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the # `FalconConfig` class from the official HuggingFace transformers library. from vllm.transformers_utils.configs.falcon import RWConfig +from vllm.transformers_utils.configs.granite import GraniteConfig from vllm.transformers_utils.configs.internvl import InternVLChatConfig from vllm.transformers_utils.configs.jais import JAISConfig from vllm.transformers_utils.configs.medusa import MedusaConfig @@ -27,4 +28,7 @@ "MLPSpeculatorConfig", "NemotronConfig", "UltravoxConfig", + # Granite can be removed from here once we have upgraded to + # transformers 4.45+ + "GraniteConfig", ]