From 26d851624bcae7160f84ee0bba88a3aa7853e110 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Fri, 16 Aug 2024 15:14:18 -0500 Subject: [PATCH] feat(pyspark): support `quantile` --- ibis/backends/sql/compilers/pyspark.py | 7 +++++++ ibis/backends/tests/test_aggregation.py | 15 +++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 5b7588ea2762..7c5eed5c6885 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -283,6 +283,13 @@ def visit_GroupConcat(self, op, *, arg, sep, where, order_by): collected = self.if_(self.f.size(collected).eq(0), NULL, collected) return self.f.array_join(collected, sep) + def visit_Quantile(self, op, *, arg, quantile, where): + if where is not None: + arg = self.if_(where, arg, NULL) + return self.f.percentile(arg, quantile) + + visit_MultiQuantile = visit_Quantile + def visit_Correlation(self, op, *, left, right, how, where): if (left_type := op.left.dtype).is_boolean(): left = self.cast(left, dt.Int32(nullable=left_type.nullable)) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index e99c102d36ac..cb128a8921bd 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -864,11 +864,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): reason="backend implements approximate quantiles", raises=com.OperationNotDefinedError, ), - pytest.mark.never( - ["pyspark"], - reason="backend implements approximate quantiles", - raises=AssertionError, - ), pytest.mark.never( ["flink"], reason="backend doesn't implement approximate quantiles yet", @@ -905,11 +900,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond): reason="backend implements approximate quantiles", raises=com.OperationNotDefinedError, ), - pytest.mark.never( - ["pyspark"], - reason="backend implements approximate quantiles", - raises=AssertionError, - ), pytest.mark.never( ["dask"], reason="backend implements approximate quantiles", @@ -1277,6 +1267,11 @@ def test_string_quantile(alltypes, func): raises=SnowflakeProgrammingError, reason="doesn't support median of dates", ) +@pytest.mark.notyet( + ["pyspark"], + raises=PySparkAnalysisException, + reason="doesn't support quantile on dates", +) @pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError)) @pytest.mark.notyet(["datafusion"], raises=Exception, reason="not supported upstream") @pytest.mark.notyet(