Skip to content

Commit

Permalink
feat(pyspark): support quantile
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist committed Aug 16, 2024
1 parent 98af798 commit 26d8516
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
7 changes: 7 additions & 0 deletions ibis/backends/sql/compilers/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,13 @@ def visit_GroupConcat(self, op, *, arg, sep, where, order_by):
collected = self.if_(self.f.size(collected).eq(0), NULL, collected)
return self.f.array_join(collected, sep)

def visit_Quantile(self, op, *, arg, quantile, where):
if where is not None:
arg = self.if_(where, arg, NULL)
return self.f.percentile(arg, quantile)

visit_MultiQuantile = visit_Quantile

def visit_Correlation(self, op, *, left, right, how, where):
if (left_type := op.left.dtype).is_boolean():
left = self.cast(left, dt.Int32(nullable=left_type.nullable))
Expand Down
15 changes: 5 additions & 10 deletions ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,11 +864,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond):
reason="backend implements approximate quantiles",
raises=com.OperationNotDefinedError,
),
pytest.mark.never(
["pyspark"],
reason="backend implements approximate quantiles",
raises=AssertionError,
),
pytest.mark.never(
["flink"],
reason="backend doesn't implement approximate quantiles yet",
Expand Down Expand Up @@ -905,11 +900,6 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond):
reason="backend implements approximate quantiles",
raises=com.OperationNotDefinedError,
),
pytest.mark.never(
["pyspark"],
reason="backend implements approximate quantiles",
raises=AssertionError,
),
pytest.mark.never(
["dask"],
reason="backend implements approximate quantiles",
Expand Down Expand Up @@ -1277,6 +1267,11 @@ def test_string_quantile(alltypes, func):
raises=SnowflakeProgrammingError,
reason="doesn't support median of dates",
)
@pytest.mark.notyet(
["pyspark"],
raises=PySparkAnalysisException,
reason="doesn't support quantile on dates",
)
@pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError))
@pytest.mark.notyet(["datafusion"], raises=Exception, reason="not supported upstream")
@pytest.mark.notyet(
Expand Down

0 comments on commit 26d8516

Please sign in to comment.