From 09a76266622610cecdf40705619f3a492aabe703 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Mon, 16 Oct 2023 16:18:56 -0500 Subject: [PATCH] feat(dask): support `Table.sample` --- ibis/backends/dask/execution/generic.py | 5 +++++ ibis/backends/tests/test_generic.py | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ibis/backends/dask/execution/generic.py b/ibis/backends/dask/execution/generic.py index ce75ca62f0e9..34c56b592e4e 100644 --- a/ibis/backends/dask/execution/generic.py +++ b/ibis/backends/dask/execution/generic.py @@ -552,3 +552,8 @@ def execute_table_array_view(op, _, **kwargs): # Need to compute dataframe in order to squeeze into a scalar ddf = execute(op.table) return ddf.compute().squeeze() + + +@execute_node.register(ops.Sample, dd.DataFrame, object, object) +def execute_sample(op, data, fraction, seed, **kwargs): + return data.sample(frac=fraction, random_state=seed) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index aa327a20677c..f7f6d5f92c0d 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1532,7 +1532,6 @@ def test_dynamic_table_slice_with_computed_offset(backend): @pytest.mark.notimpl( [ "bigquery", - "dask", "datafusion", "druid", "flink", @@ -1560,7 +1559,6 @@ def test_sample(backend): @pytest.mark.notimpl( [ "bigquery", - "dask", "datafusion", "druid", "flink", @@ -1581,7 +1579,6 @@ def test_sample_memtable(con, backend): [ "bigquery", "clickhouse", - "dask", "datafusion", "druid", "flink",