Skip to content

Commit

Permalink
[BugFix] lazy init _copy_stream to avoid torch init wrong gpu instance (
Browse files Browse the repository at this point in the history
  • Loading branch information
lnykww authored and MengqingCao committed Sep 30, 2024
1 parent f1bc362 commit f845a12
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions vllm/worker/multi_step_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,15 @@ def __init__(self, base_model_runner: GPUModelRunnerBase, *args, **kwargs):
self._base_model_runner: GPUModelRunnerBase = base_model_runner

self.is_multi_step = self.scheduler_config.is_multi_step
# used to copy tensors from GPU to CPU asynchronously
self._copy_stream = torch.cuda.Stream()
self.pinned_sampled_token_ids: Optional[torch.Tensor] = None

self.pythonization_cache = PythonizationCache()

@functools.cached_property
def _copy_stream(self):
# used to copy tensors from GPU to CPU asynchronously
return torch.cuda.Stream()

def make_model_input_from_broadcasted_tensor_dict(
self, tensor_dict: Dict[str, Any]) -> StatefulModelInput:
model_input = (StatefulModelInput.from_broadcasted_tensor_dict(
Expand Down

0 comments on commit f845a12

Please sign in to comment.