From 6aa897e86892cc8fdae5ac60e9491a0973ef791b Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Mon, 16 Oct 2023 16:31:56 -0500 Subject: [PATCH] feat(pyspark): support `Table.sample` --- ibis/backends/pyspark/compiler.py | 6 ++++++ ibis/backends/tests/test_generic.py | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index 40a82411d13f..06f90be0661f 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -300,6 +300,12 @@ def compile_limit(t, op, **kwargs): return df +@compiles(ops.Sample) +def compile_sample(t, op, **kwargs): + df = t.translate(op.table, **kwargs) + return df.sample(fraction=op.fraction, seed=op.seed) + + @compiles(ops.And) def compile_and(t, op, **kwargs): return t.translate(op.left, **kwargs) & t.translate(op.right, **kwargs) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index f7f6d5f92c0d..bdc5ae95f5ca 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1537,7 +1537,6 @@ def test_dynamic_table_slice_with_computed_offset(backend): "flink", "impala", "polars", - "pyspark", "snowflake", ] ) @@ -1564,7 +1563,6 @@ def test_sample(backend): "flink", "impala", "polars", - "pyspark", "snowflake", ] ) @@ -1588,7 +1586,6 @@ def test_sample_memtable(con, backend): "oracle", "polars", "postgres", - "pyspark", "snowflake", "sqlite", "trino",