diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 6931659dbc691..d96e3c6d192e2 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -226,3 +226,4 @@ steps: - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.5/flashinfer-0.0.5+cu121torch2.3-cp310-cp310-linux_x86_64.whl - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py + - pytest -v -s -x lora/test_mixtral.py diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py index 4eab73a71071c..bda123bf13139 100644 --- a/tests/lora/conftest.py +++ b/tests/lora/conftest.py @@ -165,7 +165,9 @@ def sql_lora_files(): @pytest.fixture(scope="session") def mixtral_lora_files(): - return snapshot_download(repo_id="terrysun/mixtral-lora-adapter") + # Note: this module has incorrect adapter_config.json to test + # https://github.com/vllm-project/vllm/pull/5909/files. + return snapshot_download(repo_id="SangBinCho/mixtral-lora") @pytest.fixture(scope="session") diff --git a/tests/lora/test_mixtral.py b/tests/lora/test_mixtral.py index e7e7724fcec56..b5b4a79eb9567 100644 --- a/tests/lora/test_mixtral.py +++ b/tests/lora/test_mixtral.py @@ -40,14 +40,14 @@ def test_mixtral_lora(mixtral_lora_files, tp_size): enable_lora=True, max_num_seqs=16, max_loras=4, + distributed_executor_backend="ray", tensor_parallel_size=tp_size) expected_lora_output = [ "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])", # noqa: E501 - "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])", # noqa: E501 + "give_opinion(name[SpellForce 3], developer[Grimlore Games], release_year[2017], rating[poor])", # noqa: E501 "inform(name[BioShock], release_year[2007], rating[good], genres[action-adventure, role-playing, shooter], platforms[PlayStation, Xbox, PC], available_on_steam[yes], has_linux_release[no], has_mac_release[yes])", # noqa: E501 ] - assert do_sample(llm, mixtral_lora_files, lora_id=1) == expected_lora_output assert do_sample(llm, mixtral_lora_files, diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 0a1fc7c021781..689835def83dd 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -303,25 +303,54 @@ def from_local_checkpoint( "new_embeddings.bin") with open(lora_config_path) as f: config = json.load(f) - target_modules = config["target_modules"] - unexpected_modules = [] - for module in target_modules: - # Compatible with more modules, such as:layers.11.self_attn.k_proj - part_name = module.split(".")[-1] - if part_name not in expected_lora_modules: - unexpected_modules.append(module) - # loaded lora's target modules must be a subset of expected_lora_modules - - if unexpected_modules: - print(unexpected_modules, "modules") - raise ValueError( - f"While loading {lora_dir}, expected" - f" target modules in {expected_lora_modules}" - f" but received {unexpected_modules}." - f" Please verify that the loaded LoRA module is correct") if os.path.isfile(lora_tensor_path): - tensors = safetensors.torch.load_file(lora_tensor_path) + tensors: Dict[str, torch.Tensor] = {} + # Find unexpected modules. + # Use safetensor key as a source of truth to find expected modules. + # in peft if you have target_modules A, B, C and C does not exist + # in the model it won’t error and model will be trained with A, B + # loraified. C won’t exist in the safetensor but it will exist in + # the target_modules of the adapter_config.json. + unexpected_modules = [] + with safetensors.safe_open(lora_tensor_path, + framework="pt") as f: # type: ignore + for lora_module in f.keys(): # noqa + module_name, _ = parse_fine_tuned_lora_name(lora_module) + part_name = module_name.split(".")[-1] + if part_name not in expected_lora_modules: + unexpected_modules.append(module_name) + if unexpected_modules: + raise ValueError( + f"While loading {lora_dir}, expected" + f" target modules in {expected_lora_modules}" + f" but received {unexpected_modules}." + f" Please verify that the loaded LoRA module is correct" + ) + # Load tensors if there are only expected modules. + for module in f.keys(): # noqa + tensors[module] = f.get_tensor(module) elif os.path.isfile(lora_bin_file_path): + # When a bin file is provided, we rely on config to find unexpected + # modules. + unexpected_modules = [] + target_modules = config["target_modules"] + for module in target_modules: + # Compatible with more modules, + # such as:layers.11.self_attn.k_proj + part_name = module.split(".")[-1] + if part_name not in expected_lora_modules: + unexpected_modules.append(module) + # loaded lora's target modules must be a subset of + # expected_lora_modules. It is not reliable. See + # https://github.com/vllm-project/vllm/pull/5909. But there's no + # other better mechanism. + if unexpected_modules: + print(unexpected_modules, "modules") + raise ValueError( + f"While loading {lora_dir}, expected" + f" target modules in {expected_lora_modules}" + f" but received {unexpected_modules}." + f" Please verify that the loaded LoRA module is correct") tensors = torch.load(lora_bin_file_path) else: raise ValueError(f"{lora_dir} doesn't contain tensors")