Skip to content

Commit

Permalink
feat(api): add cume_dist operation
Browse files Browse the repository at this point in the history
I didn't get this working with PySpark but it seems like the rest are
doing ok.
  • Loading branch information
gforsyth authored and cpcloud committed Jul 15, 2022
1 parent b4b8a42 commit 6b6b185
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def _window(t, expr):
ops.MinRank,
ops.NTile,
ops.PercentRank,
ops.CumeDist,
)

if isinstance(window_op, ops.CumulativeOp):
Expand Down Expand Up @@ -389,6 +390,7 @@ def _window(t, expr):
ops.MinRank,
ops.NTile,
ops.PercentRank,
ops.CumeDist,
ops.RowNumber,
)

Expand Down Expand Up @@ -656,6 +658,7 @@ def translate(t, expr):
ops.DenseRank: unary(lambda _: sa.func.dense_rank()),
ops.MinRank: unary(lambda _: sa.func.rank()),
ops.PercentRank: unary(lambda _: sa.func.percent_rank()),
ops.CumeDist: unary(lambda _: sa.func.cume_dist()),
ops.NthValue: _nth_value,
ops.Window: _window,
ops.CumulativeOp: _window,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ def hash(translator, expr):
ops.DenseRank: lambda *args: 'dense_rank()',
ops.MinRank: lambda *args: 'rank()',
ops.PercentRank: lambda *args: 'percent_rank()',
ops.CumeDist: lambda *args: 'cume_dist()',
ops.FirstValue: unary('first_value'),
ops.LastValue: unary('last_value'),
ops.Lag: window.shift_like('lag'),
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/base/sql/registry/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def _foll(f: Optional[int]) -> str:
ops.MinRank,
ops.NTile,
ops.PercentRank,
ops.CumeDist,
ops.RowNumber,
)

Expand Down Expand Up @@ -250,6 +251,7 @@ def window(translator, expr):
ops.FirstValue,
ops.LastValue,
ops.PercentRank,
ops.CumeDist,
ops.NTile,
)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/clickhouse/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ def _day_of_week_index(translator, expr):
ops.DecimalPrecision,
ops.DecimalScale,
ops.BaseConvert,
ops.CumeDist,
ops.CumulativeSum,
ops.CumulativeMin,
ops.CumulativeMax,
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/pandas/execution/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,8 @@ def execute_series_group_by_percent_rank(op, data, **kwargs):
def execute_series_percent_rank(op, data, **kwargs):
# TODO(phillipc): Handle ORDER BY
return data.rank(method="min", ascending=True).sub(1).div(len(data) - 1)


@execute_node.register(ops.CumeDist, (pd.Series, SeriesGroupBy))
def execute_series_group_by_cume_dist(op, data, **kwargs):
return data.rank(method='min', ascending=True, pct=True)
7 changes: 7 additions & 0 deletions ibis/backends/pyspark/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,13 @@ def compile_percent_rank(t, expr, scope, timecontext, **kwargs):
return F.percent_rank()


@compiles(ops.CumeDist)
def compile_cume_dist(t, expr, scope, timecontext, **kwargs):
raise com.UnsupportedOperationError(
'PySpark backend does not support cume_dist with Ibis.'
)


@compiles(ops.NTile)
def compile_ntile(t, expr, scope, timecontext, **kwargs):
op = expr.op()
Expand Down
6 changes: 6 additions & 0 deletions ibis/backends/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ def calc_zscore(s):
).reset_index(drop=True, level=[0]),
id='percent_rank',
),
param(
lambda t, win: t.id.cume_dist().over(win),
lambda t: t.id.rank(method='min') / t.id.transform(len),
id='cume_dist',
marks=pytest.mark.notimpl(["pyspark"]),
),
param(
lambda t, win: t.float_col.ntile(buckets=7).over(win),
lambda t: t,
Expand Down
6 changes: 6 additions & 0 deletions ibis/expr/operations/analytic.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ class PercentRank(Analytic):
output_dtype = dt.double


@public
class CumeDist(Analytic):
arg = rlz.column(rlz.any)
output_dtype = dt.double


@public
class NTile(Analytic):
arg = rlz.column(rlz.any)
Expand Down
5 changes: 5 additions & 0 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,11 @@ def percent_rank(self) -> Column:

return ops.PercentRank(self).to_expr()

def cume_dist(self) -> Column:
import ibis.expr.operations as ops

return ops.CumeDist(self).to_expr()

def cummin(self) -> Column:
import ibis.expr.operations as ops

Expand Down

0 comments on commit 6b6b185

Please sign in to comment.