From 048fffcb718f640c6e8aa9c7b9fedc7f58d5886b Mon Sep 17 00:00:00 2001 From: Alexander Matveev Date: Thu, 12 Sep 2024 13:40:46 +0000 Subject: [PATCH 1/3] [Bugfix] Fix async log stats --- tests/basic_correctness/test_preemption.py | 1 + vllm/engine/llm_engine.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/basic_correctness/test_preemption.py b/tests/basic_correctness/test_preemption.py index 7e77037da07d3..50d399bef1878 100644 --- a/tests/basic_correctness/test_preemption.py +++ b/tests/basic_correctness/test_preemption.py @@ -64,6 +64,7 @@ def test_chunked_prefill_recompute( enable_chunked_prefill=enable_chunked_prefill, max_num_seqs=max_num_seqs, worker_use_ray=worker_use_ray, + disable_log_stats=False, ) as vllm_model: vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 92e46c7af5162..4015da26f14e4 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1435,7 +1435,8 @@ def _process_model_outputs(self, # LLMEngine/AsyncLLMEngine directly if is_async: # Log stats. - self.do_log_stats(scheduler_outputs, outputs, finished_before) + self.do_log_stats(scheduler_outputs, outputs, finished_before, + skip) # Tracing self.do_tracing(scheduler_outputs) @@ -1742,18 +1743,20 @@ def remove_logger(self, logger_name: str) -> None: def do_log_stats(self, scheduler_outputs: Optional[SchedulerOutputs] = None, model_output: Optional[List[SamplerOutput]] = None, - finished_before: Optional[List[int]] = None) -> None: + finished_before: Optional[List[int]] = None, + skip: Optional[List[int]] = None) -> None: """Forced log when no requests active.""" if self.log_stats: stats = self._get_stats(scheduler_outputs, model_output, - finished_before) + finished_before, skip) for logger in self.stat_loggers.values(): logger.log(stats) def _get_stats(self, scheduler_outputs: Optional[SchedulerOutputs], model_output: Optional[List[SamplerOutput]] = None, - finished_before: Optional[List[int]] = None) -> Stats: + finished_before: Optional[List[int]] = None, + skip: Optional[List[int]] = None) -> Stats: """Get Stats to be Logged to Prometheus. Args: @@ -1835,6 +1838,11 @@ def _get_stats(self, actual_num_batched_tokens -= 1 continue + # Currently, skip == preempted sequences, so we need to skip + # their log stats + if skip and idx in skip: + continue + group_was_prefill = idx < scheduler_outputs.num_prefill_groups seq_group = scheduled_seq_group.seq_group From 6f23d75d0f54bc11333d6c318704909e40f9cd78 Mon Sep 17 00:00:00 2001 From: Alexander Matveev Date: Thu, 12 Sep 2024 15:44:27 +0000 Subject: [PATCH 2/3] Cody's comments --- vllm/engine/llm_engine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 4015da26f14e4..c63a79273558d 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1764,6 +1764,10 @@ def _get_stats(self, the scheduled batch, model_output: Optional, used to emit speculative decoding metrics which are created by the workers. + finished_before: Optional, indices of sequences that were finished + already before. These sequences will be ignored. + skip: Optional, indices of sequences that were preempted. These + sequences will be ignored. """ now = time.time() From 8169fb7e750ee2469a229888b46c789f6a5d1052 Mon Sep 17 00:00:00 2001 From: Alexander Matveev Date: Thu, 12 Sep 2024 18:50:31 +0000 Subject: [PATCH 3/3] ping --- vllm/engine/llm_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c63a79273558d..1d18af2501337 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1765,7 +1765,7 @@ def _get_stats(self, model_output: Optional, used to emit speculative decoding metrics which are created by the workers. finished_before: Optional, indices of sequences that were finished - already before. These sequences will be ignored. + before. These sequences will be ignored. skip: Optional, indices of sequences that were preempted. These sequences will be ignored. """