pytorch · joecummings · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/tests/torchtune/generation/test_generation.py b/tests/torchtune/generation/test_generation.py
@@ -221,10 +221,7 @@ def test_sample_consistency(self):
         # set all probabilities except for token_id=100 to 0
         logits = torch.zeros(2000)
         logits[100] = 1
-        q = torch.empty(
-            (1, 2000),
-        ).exponential_(1)
-        token = sample(logits, q, temperature=1, top_k=1)
+        token = sample(logits, top_k=1)
         assert token.item() == 100
 
     @pytest.mark.parametrize(

diff --git a/torchtune/generation/_generation.py b/torchtune/generation/_generation.py
@@ -17,18 +17,28 @@ def multinomial_sample_one(probs: torch.Tensor, q: torch.Tensor) -> torch.Tensor
 
 def sample(
     logits: torch.Tensor,
-    q: torch.Tensor,
     *,
     temperature: float = 1.0,
     top_k: Optional[int] = None,
+    q: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
-    """Generic sample from a probability distribution.
+    """Generic sample from a probability distribution. Includes support for Top-K sampling
+    and Temperature.
 
     Args:
         logits (torch.Tensor): logits from which to sample
-        q (torch.Tensor): randomly sampled tensor for softmax sampling trick.
         temperature (float): value to scale the predicted logits by, default 1.0.
         top_k (Optional[int]): If specified, we prune the sampling to only token ids within the top_k probabilities
+        q (Optional[torch.Tensor]): randomly sampled tensor for softmax sampling trick. If None,
+            we use the default softmax sampling trick. Default None.
+
+    Example:
+        >>> from torchtune.generation import sample
+        >>> logits = torch.empty(3, 3).uniform_(0, 1)
+        >>> sample(logits)
+        tensor([[1],
+                [2],
+                [0]], dtype=torch.int32)
 
     Returns:
         torch.Tensor: sampled token id
@@ -42,8 +52,14 @@ def sample(
         # set everything smaller than pivot value to inf since these
         # should be pruned
         logits = torch.where(logits < pivot, -float("Inf"), logits)
+
     # change logits into probabilities
     probs = torch.nn.functional.softmax(logits, dim=-1)
+
+    # if q is None, we use the default softmax sampling trick
+    if q is None:
+        q = torch.empty_like(probs).exponential_(1)
+
     return multinomial_sample_one(probs, q)
 
 
@@ -85,7 +101,7 @@ def generate_next_token(
     # we want to take the last token's logits as the input to the next model call
     logits = model(x, input_pos=input_pos, mask=mask)
     return (
-        sample(logits[:, -1].clone(), q, temperature=temperature, top_k=top_k),
+        sample(logits[:, -1].clone(), temperature=temperature, top_k=top_k, q=q),
         logits,
     )