From 3c15fb9fbf388c1ad5c0d4e8e2192822ea860926 Mon Sep 17 00:00:00 2001 From: hnyls2002 Date: Thu, 25 Apr 2024 08:24:16 +0000 Subject: [PATCH] fix: only accept a prefix of waiting queue --- python/sglang/srt/managers/router/model_rpc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py index 883bb12f91..0ca46c854a 100644 --- a/python/sglang/srt/managers/router/model_rpc.py +++ b/python/sglang/srt/managers/router/model_rpc.py @@ -348,6 +348,7 @@ def get_new_fill_batch(self): # Undo the insertion delta = self.tree_cache.dec_ref_counter(req.last_node) available_size += delta + break else: # Add this request to the running batch self.token_to_kv_pool.add_refs(req.prefix_indices) @@ -356,7 +357,8 @@ def get_new_fill_batch(self): req.extend_input_len + req.max_new_tokens() ) new_batch_input_tokens += req.extend_input_len - + else: + break if len(can_run_list) == 0: return None