diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 94d85c426ec..8405fdc7f68 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -460,6 +460,7 @@ def get_next_batch_to_run(self): current_inflight_req=self.current_inflight_req ) self.tree_cache.cache_unfinished_req(self.current_inflight_req) + # Inflight request keeps its rid but will get a new req_pool_idx. self.req_to_token_pool.free(self.current_inflight_req.req_pool_idx) self.batch_is_full = False if not self.last_batch.is_empty(): @@ -489,9 +490,7 @@ def get_next_batch_to_run(self): def get_new_batch_prefill(self) -> Optional[ScheduleBatch]: # Handle the cases where prefill is not allowed - if ( - self.batch_is_full or len(self.waiting_queue) == 0 - ) and self.current_inflight_req is None: + if self.batch_is_full or len(self.waiting_queue) == 0: return None running_bs = len(self.running_batch.reqs) if self.running_batch else 0