Skip to content

Commit

Permalink
feat(api): move analytic window functions to top-level
Browse files Browse the repository at this point in the history
cpcloud authored and kszucs committed Oct 12, 2023
1 parent 1e40d4e commit 8f2ced1
Showing 4 changed files with 142 additions and 25 deletions.
8 changes: 4 additions & 4 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
@@ -675,10 +675,10 @@ class array_filter(FunctionElement):
ops.FirstValue: unary(sa.func.first_value),
ops.LastValue: unary(sa.func.last_value),
ops.RowNumber: fixed_arity(sa.func.row_number, 0),
ops.DenseRank: unary(lambda _: sa.func.dense_rank()),
ops.MinRank: unary(lambda _: sa.func.rank()),
ops.PercentRank: unary(lambda _: sa.func.percent_rank()),
ops.CumeDist: unary(lambda _: sa.func.cume_dist()),
ops.DenseRank: fixed_arity(sa.func.dense_rank, 0),
ops.MinRank: fixed_arity(sa.func.rank, 0),
ops.PercentRank: fixed_arity(sa.func.percent_rank, 0),
ops.CumeDist: fixed_arity(sa.func.cume_dist, 0),
ops.NthValue: _nth_value,
ops.WindowFunction: _window_function,
}
92 changes: 92 additions & 0 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
@@ -57,6 +57,11 @@
"connect",
"cross_join",
"cumulative_window",
"cume_dist",
"rank",
"ntile",
"dense_rank",
"percent_rank",
"date",
"desc",
"decompile",
@@ -1021,6 +1026,93 @@ def now() -> ir.TimestampScalar:
return ops.TimestampNow().to_expr()


def rank() -> ir.IntegerColumn:
"""Compute position of first element within each equal-value group in sorted order.
Equivalent to SQL's `RANK()` window function.
Returns
-------
Int64Column
The min rank
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=ibis.rank().over(order_by=t.values))
┏━━━━━━━━┳━━━━━━━┓
┃ values ┃ rank ┃
┡━━━━━━━━╇━━━━━━━┩
│ int64 │ int64 │
├────────┼───────┤
│ 1 │ 0 │
│ 1 │ 0 │
│ 2 │ 2 │
│ 2 │ 2 │
│ 2 │ 2 │
│ 3 │ 5 │
└────────┴───────┘
"""
return ops.MinRank().to_expr()


def dense_rank() -> ir.IntegerColumn:
"""Position of first element within each group of equal values.
Values are returned in sorted order and duplicate values are ignored.
Equivalent to SQL's `DENSE_RANK()`.
Returns
-------
IntegerColumn
The rank
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=ibis.dense_rank().over(order_by=t.values))
┏━━━━━━━━┳━━━━━━━┓
┃ values ┃ rank ┃
┡━━━━━━━━╇━━━━━━━┩
│ int64 │ int64 │
├────────┼───────┤
│ 1 │ 0 │
│ 1 │ 0 │
│ 2 │ 1 │
│ 2 │ 1 │
│ 2 │ 1 │
│ 3 │ 2 │
└────────┴───────┘
"""
return ops.DenseRank().to_expr()


def percent_rank() -> ir.FloatingColumn:
"""Return the relative rank of the values in the column."""
return ops.PercentRank().to_expr()


def cume_dist() -> ir.FloatingColumn:
"""Return the cumulative distribution over a window."""
return ops.CumeDist().to_expr()


def ntile(buckets: int | ir.IntegerValue) -> ir.IntegerColumn:
"""Return the integer number of a partitioning of the column values.
Parameters
----------
buckets
Number of buckets to partition into
"""
return ops.NTile(buckets).to_expr()


def row_number() -> ir.IntegerColumn:
"""Return an analytic function expression for the current row number.
9 changes: 2 additions & 7 deletions ibis/expr/operations/analytic.py
Original file line number Diff line number Diff line change
@@ -45,12 +45,12 @@ class RankBase(Analytic):

@public
class MinRank(RankBase):
arg: Column[dt.Any]
pass


@public
class DenseRank(RankBase):
arg: Column[dt.Any]
pass


@public
@@ -76,21 +76,16 @@ class RowNumber(RankBase):

@public
class PercentRank(Analytic):
arg: Column[dt.Any]

dtype = dt.double


@public
class CumeDist(Analytic):
arg: Column[dt.Any]

dtype = dt.double


@public
class NTile(Analytic):
arg: Column[dt.Any]
buckets: Scalar[dt.Integer]

dtype = dt.int64
58 changes: 44 additions & 14 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
@@ -1765,7 +1765,13 @@ def rank(self) -> ir.IntegerColumn:
│ 3 │ 5 │
└────────┴───────┘
"""
return ops.MinRank(self).to_expr()
import ibis.expr.analysis as an

return (
ibis.rank()
.over(order_by=self)
.resolve(an.find_first_base_table(self.op()).to_expr())
)

def dense_rank(self) -> ir.IntegerColumn:
"""Position of first element within each group of equal values.
@@ -1798,15 +1804,49 @@ def dense_rank(self) -> ir.IntegerColumn:
│ 3 │ 2 │
└────────┴───────┘
"""
return ops.DenseRank(self).to_expr()
import ibis.expr.analysis as an

return (
ibis.dense_rank()
.over(order_by=self)
.resolve(an.find_first_base_table(self.op()).to_expr())
)

def percent_rank(self) -> Column:
"""Return the relative rank of the values in the column."""
return ops.PercentRank(self).to_expr()
import ibis.expr.analysis as an

return (
ibis.percent_rank()
.over(order_by=self)
.resolve(an.find_first_base_table(self.op()).to_expr())
)

def cume_dist(self) -> Column:
"""Return the cumulative distribution over a window."""
return ops.CumeDist(self).to_expr()
import ibis.expr.analysis as an

return (
ibis.cume_dist()
.over(order_by=self)
.resolve(an.find_first_base_table(self.op()).to_expr())
)

def ntile(self, buckets: int | ir.IntegerValue) -> ir.IntegerColumn:
"""Return the integer number of a partitioning of the column values.
Parameters
----------
buckets
Number of buckets to partition into
"""
import ibis.expr.analysis as an

return (
ibis.ntile(buckets)
.over(order_by=self)
.resolve(an.find_first_base_table(self.op()).to_expr())
)

def cummin(self, *, where=None, group_by=None, order_by=None) -> Column:
"""Return the cumulative min over a window."""
@@ -1852,16 +1892,6 @@ def lead(
"""
return ops.Lead(self, offset, default).to_expr()

def ntile(self, buckets: int | ir.IntegerValue) -> ir.IntegerColumn:
"""Return the integer number of a partitioning of the column values.
Parameters
----------
buckets
Number of buckets to partition into
"""
return ops.NTile(self, buckets).to_expr()

def nth(self, n: int | ir.IntegerValue) -> Column:
"""Return the `n`th value (0-indexed) over a window.

0 comments on commit 8f2ced1

Please sign in to comment.