Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix] Fix async log stats #8417

Merged
merged 3 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/basic_correctness/test_preemption.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def test_chunked_prefill_recompute(
enable_chunked_prefill=enable_chunked_prefill,
max_num_seqs=max_num_seqs,
worker_use_ray=worker_use_ray,
disable_log_stats=False,
) as vllm_model:
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
assert (vllm_model.model.llm_engine.scheduler[0].artificial_preempt_cnt
Expand Down
20 changes: 16 additions & 4 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,8 @@ def _process_model_outputs(self,
# LLMEngine/AsyncLLMEngine directly
if is_async:
# Log stats.
self.do_log_stats(scheduler_outputs, outputs, finished_before)
self.do_log_stats(scheduler_outputs, outputs, finished_before,
skip)

# Tracing
self.do_tracing(scheduler_outputs)
Expand Down Expand Up @@ -1742,25 +1743,31 @@ def remove_logger(self, logger_name: str) -> None:
def do_log_stats(self,
scheduler_outputs: Optional[SchedulerOutputs] = None,
model_output: Optional[List[SamplerOutput]] = None,
finished_before: Optional[List[int]] = None) -> None:
finished_before: Optional[List[int]] = None,
skip: Optional[List[int]] = None) -> None:
"""Forced log when no requests active."""
if self.log_stats:
stats = self._get_stats(scheduler_outputs, model_output,
finished_before)
finished_before, skip)
for logger in self.stat_loggers.values():
logger.log(stats)

def _get_stats(self,
scheduler_outputs: Optional[SchedulerOutputs],
model_output: Optional[List[SamplerOutput]] = None,
finished_before: Optional[List[int]] = None) -> Stats:
finished_before: Optional[List[int]] = None,
skip: Optional[List[int]] = None) -> Stats:
comaniac marked this conversation as resolved.
Show resolved Hide resolved
"""Get Stats to be Logged to Prometheus.

Args:
scheduler_outputs: Optional, used to populate metrics related to
the scheduled batch,
model_output: Optional, used to emit speculative decoding metrics
which are created by the workers.
finished_before: Optional, indices of sequences that were finished
before. These sequences will be ignored.
skip: Optional, indices of sequences that were preempted. These
sequences will be ignored.
"""
now = time.time()

Expand Down Expand Up @@ -1835,6 +1842,11 @@ def _get_stats(self,
actual_num_batched_tokens -= 1
continue

# Currently, skip == preempted sequences, so we need to skip
# their log stats
if skip and idx in skip:
continue

group_was_prefill = idx < scheduler_outputs.num_prefill_groups
seq_group = scheduled_seq_group.seq_group

Expand Down
Loading