Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Implementing rowid #2345

Merged
merged 7 commits into from
Sep 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ Table methods
TableExpr.mutate
TableExpr.projection
TableExpr.relabel
TableExpr.rowid
TableExpr.schema
TableExpr.set_column
TableExpr.sort_by
Expand Down
1 change: 1 addition & 0 deletions docs/source/release/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Release Notes
These release notes are for versions of ibis **1.0 and later**. Release
notes for pre-1.0 versions of ibis can be found at :doc:`release-pre-1.0`

* :feature:`2251` Add ``rowid`` expression, supported by SQLite and OmniSciDB
* :feature:`2230` Add intersection to general ibis api
* :support:`2304` Update ``google-cloud-bigquery`` dependency minimum version to 1.12.0
* :feature:`2303` Add ``application_name`` argument to ``ibis.bigquery.connect`` to allow attributing Google API requests to projects that use Ibis.
Expand Down
27 changes: 27 additions & 0 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4278,6 +4278,32 @@ def _table_drop(self, fields):
return self[[field for field in schema if field not in field_set]]


def _rowid(self):
"""
An autonumeric representing the row number of the results.

It can be 0 or 1 indexed depending on the backend. Check the backend
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

really?

documentation.

Note that this is different from the window function row number
(even if they are conceptually the same), and different from row
id in backends where it represents the physical location (e.g. Oracle
or PostgreSQL's ctid).

Returns
-------
ir.IntegerColumn

Examples
--------
>>> my_table[my_table.rowid(), my_table.name].execute()
1|Ibis
2|pandas
3|Dask
"""
return ops.RowID().to_expr()


_table_methods = dict(
aggregate=aggregate,
count=_table_count,
Expand Down Expand Up @@ -4308,6 +4334,7 @@ def _table_drop(self, fields):
union=_table_union,
intersect=_table_intersect,
view=_table_view,
rowid=_rowid,
)


Expand Down
13 changes: 13 additions & 0 deletions ibis/expr/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,19 @@ def _make_expr(self):
return klass(self, name=self.name)


class RowID(ValueOp):
"""The row number (an autonumeric) of the returned result."""

def output_type(self):
return dt.int64.column_type()

def resolve_name(self):
return 'rowid'

def has_resolved_name(self):
return True


def find_all_base_tables(expr, memo=None):
if memo is None:
memo = {}
Expand Down
1 change: 1 addition & 0 deletions ibis/omniscidb/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,7 @@ def formatter(translator, expr):
ops.IsNan: unary('isNan'),
ops.NullIfZero: _nullifzero,
ops.ZeroIfNull: _zeroifnull,
ops.RowID: lambda *args: 'rowid',
}

# WINDOW
Expand Down
7 changes: 7 additions & 0 deletions ibis/pandas/execution/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,3 +1066,10 @@ def execute_simple_case_series(op, value, whens, thens, otherwise, **kwargs):
@execute_node.register(ops.Distinct, pd.DataFrame)
def execute_distinct_dataframe(op, df, **kwargs):
return df.drop_duplicates()


@execute_node.register(ops.RowID)
def execute_rowid(op, *args, **kwargs):
raise com.UnsupportedOperationError(
'rowid is not supported in pandas backends'
)
1 change: 1 addition & 0 deletions ibis/sql/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ def _rpad(t, expr):
ops.StandardDev: toolz.compose(
sa.func._ibis_sqlite_sqrt, _variance_reduction('_ibis_sqlite_var')
),
ops.RowID: lambda t, expr: sa.literal_column('rowid'),
}
)

Expand Down
31 changes: 31 additions & 0 deletions ibis/tests/all/test_column.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import numpy as np
import pandas as pd
import pytest


ROWID_ZERO_INDEXED_BACKENDS = ('omniscidb',)


@pytest.mark.parametrize(
'column',
[
Expand All @@ -16,3 +21,29 @@ def test_distinct_column(backend, alltypes, df, column):
result = expr.execute()
expected = df[column].unique()
assert set(result) == set(expected)


@pytest.mark.xfail_unsupported
def test_rowid(con, backend):
t = con.table('functional_alltypes')
result = t[t.rowid()].execute()
first_value = 0 if backend.name in ROWID_ZERO_INDEXED_BACKENDS else 1
expected = pd.Series(
range(first_value, first_value + len(result)),
dtype=np.int64,
name='rowid',
)
pd.testing.assert_series_equal(result.iloc[:, 0], expected)


@pytest.mark.xfail_unsupported
def test_named_rowid(con, backend):
t = con.table('functional_alltypes')
result = t[t.rowid().name('number')].execute()
first_value = 0 if backend.name in ROWID_ZERO_INDEXED_BACKENDS else 1
expected = pd.Series(
range(first_value, first_value + len(result)),
dtype=np.int64,
name='number',
)
pd.testing.assert_series_equal(result.iloc[:, 0], expected)