diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 3f06fabb3dbb..84266b968858 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -586,7 +586,7 @@ class Sample(Relation): table: Relation fraction: Annotated[float, Between(0, 1)] - method: Literal["row", "block"] = "row" + method: Literal["row", "block"] seed: UnionType[int, None] = None @attribute diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index f183037b0ae2..df12ffe735db 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -1200,6 +1200,20 @@ def sample( ) -> Table: """Sample a fraction of rows from a table. + ::: {.callout-note} + ## Results may be non-repeatable + + Sampling is by definition a random operation. Some backends support + specifying a `seed` for repeatable results, but not all backends + support that option. And some backends (duckdb, for example) do support + specifying a seed but may still not have repeatable results in all + cases. + + In all cases, results are backend-specific. An execution against one + backend is unlikely to sample the same rows when executed against a + different backend, even with the same `seed` set. + ::: + Parameters ---------- fraction