From e440b8f07579eaac560a98c08317783fd5bae967 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 27 Nov 2024 23:27:31 -0800 Subject: [PATCH] fix chunked prefill --- python/sglang/srt/server_args.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 57f918e611..144ade58ea 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -144,7 +144,7 @@ def __post_init__(self): if self.served_model_name is None: self.served_model_name = self.model_path - if self.chunked_prefill_size <= 0: + if self.chunked_prefill_size is not None and self.chunked_prefill_size <= 0: # Disable chunked prefill self.chunked_prefill_size = None @@ -199,10 +199,6 @@ def __post_init__(self): "Overlap schedule is disabled." ) - # Model-specific changes - if "Qwen2-VL-" in self.model_path: - self.disable_overlap_schedule = True - @staticmethod def add_cli_args(parser: argparse.ArgumentParser): # Model and port args