diff --git a/gptqmodel/utils/vllm.py b/gptqmodel/utils/vllm.py index a5e9be2df..8cad116d1 100644 --- a/gptqmodel/utils/vllm.py +++ b/gptqmodel/utils/vllm.py @@ -24,8 +24,6 @@ def convert_hf_params_to_vllm(hf_params: Dict[str, Any]): 'top_p': hf_params.get('top_p', 1.0), 'max_tokens': hf_params.get('max_length', 16), 'min_tokens': hf_params.get('min_length', 0), - 'early_stopping': hf_params.get('early_stopping', False), - 'length_penalty': hf_params.get('length_penalty', 1.0), 'stop_token_ids': [hf_params.get('eos_token_id'), None], } return SamplingParams(**params) @@ -58,8 +56,7 @@ def vllm_generate( if not isinstance(sampling_params, SamplingParams): hf_params = {key: kwargs[key] for key in [ 'num_return_sequences', 'repetition_penalty', 'temperature', - 'top_k', 'top_p', 'max_length', 'min_length', - 'early_stopping', 'length_penalty', 'eos_token_id' + 'top_k', 'top_p', 'max_length', 'min_length', 'eos_token_id' ] if key in kwargs} sampling_params = convert_hf_params_to_vllm(hf_params)