From db29e10fe7fd26ef8e9024686fef38fc137546b0 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 27 Mar 2023 08:02:05 -0400 Subject: [PATCH] feat(pyspark): implement count distinct --- ibis/backends/pyspark/compiler.py | 5 +++++ ibis/backends/tests/test_aggregation.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index 4fc38054477d..320c9f150432 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -579,6 +579,11 @@ def compile_count(t, op, **kwargs): return compile_aggregator(t, op, fn=F.count, **kwargs) +@compiles(ops.CountDistinct) +def compile_count_distinct(t, op, **kwargs): + return compile_aggregator(t, op, fn=F.count_distinct, **kwargs) + + @compiles(ops.CountStar) def compile_count_star(t, op, aggcontext=None, **kwargs): src_table = t.translate(op.arg, **kwargs) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 9e86bb7e2fb0..dff38abd3732 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -282,7 +282,7 @@ def mean_and_std(v): lambda t, where: t.bool_col[where].dropna().nunique(), id='nunique', marks=pytest.mark.notimpl( - ["pyspark", "datafusion"], raises=com.OperationNotDefinedError + ["datafusion"], raises=com.OperationNotDefinedError ), ), param(