Skip to content

Commit

Permalink
feat(api): add relocate table expression API for moving columns aro…
Browse files Browse the repository at this point in the history
…und based on selectors
  • Loading branch information
cpcloud authored and gforsyth committed Aug 22, 2023
1 parent 2be1323 commit ee8a86f
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 2 deletions.
183 changes: 183 additions & 0 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3609,6 +3609,189 @@ def pivot_wider(

return self.group_by(id_cols).aggregate(**aggs)

def relocate(
self,
*columns: str | s.Selector,
before: str | s.Selector | None = None,
after: str | s.Selector | None = None,
**kwargs: str,
) -> Table:
"""Relocate `columns` before or after other specified columns.
Parameters
----------
columns
Columns to relocate. Selectors are accepted.
before
A column name or selector to insert the new columns before.
after
A column name or selector. Columns in `columns` are relocated after the last
column selected in `after`.
kwargs
Additional column names to relocate, renaming argument values to
keyword argument names.
Returns
-------
Table
A table with the columns relocated.
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> import ibis.selectors as s
>>> t = ibis.memtable(dict(a=[1], b=[1], c=[1], d=["a"], e=["a"], f=["a"]))
>>> t
┏━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ a ┃ b ┃ c ┃ d ┃ e ┃ f ┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ int64 │ int64 │ string │ string │ string │
├───────┼───────┼───────┼────────┼────────┼────────┤
│ 1 │ 1 │ 1 │ a │ a │ a │
└───────┴───────┴───────┴────────┴────────┴────────┘
>>> t.relocate("f")
┏━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ f ┃ a ┃ b ┃ c ┃ d ┃ e ┃
┡━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ string │ int64 │ int64 │ int64 │ string │ string │
├────────┼───────┼───────┼───────┼────────┼────────┤
│ a │ 1 │ 1 │ 1 │ a │ a │
└────────┴───────┴───────┴───────┴────────┴────────┘
>>> t.relocate("a", after="c")
┏━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ b ┃ c ┃ a ┃ d ┃ e ┃ f ┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ int64 │ int64 │ string │ string │ string │
├───────┼───────┼───────┼────────┼────────┼────────┤
│ 1 │ 1 │ 1 │ a │ a │ a │
└───────┴───────┴───────┴────────┴────────┴────────┘
>>> t.relocate("f", before="b")
┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ a ┃ f ┃ b ┃ c ┃ d ┃ e ┃
┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ string │ int64 │ int64 │ string │ string │
├───────┼────────┼───────┼───────┼────────┼────────┤
│ 1 │ a │ 1 │ 1 │ a │ a │
└───────┴────────┴───────┴───────┴────────┴────────┘
>>> t.relocate("a", after=s.last())
┏━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ b ┃ c ┃ d ┃ e ┃ f ┃ a ┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ int64 │ int64 │ string │ string │ string │ int64 │
├───────┼───────┼────────┼────────┼────────┼───────┤
│ 1 │ 1 │ a │ a │ a │ 1 │
└───────┴───────┴────────┴────────┴────────┴───────┘
Relocate allows renaming
>>> t.relocate(ff="f")
┏━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ ff ┃ a ┃ b ┃ c ┃ d ┃ e ┃
┡━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ string │ int64 │ int64 │ int64 │ string │ string │
├────────┼───────┼───────┼───────┼────────┼────────┤
│ a │ 1 │ 1 │ 1 │ a │ a │
└────────┴───────┴───────┴───────┴────────┴────────┘
You can relocate based on any predicate selector, such as
[`of_type`][ibis.selectors.of_type]
>>> t.relocate(s.of_type("string"))
┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ d ┃ e ┃ f ┃ a ┃ b ┃ c ┃
┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩
│ string │ string │ string │ int64 │ int64 │ int64 │
├────────┼────────┼────────┼───────┼───────┼───────┤
│ a │ a │ a │ 1 │ 1 │ 1 │
└────────┴────────┴────────┴───────┴───────┴───────┘
>>> t.relocate(s.numeric(), after=s.last())
┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ d ┃ e ┃ f ┃ a ┃ b ┃ c ┃
┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩
│ string │ string │ string │ int64 │ int64 │ int64 │
├────────┼────────┼────────┼───────┼───────┼───────┤
│ a │ a │ a │ 1 │ 1 │ 1 │
└────────┴────────┴────────┴───────┴───────┴───────┘
>>> t.relocate(s.any_of(s.c(*"ae")))
┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ a ┃ e ┃ b ┃ c ┃ d ┃ f ┃
┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ string │ int64 │ int64 │ string │ string │
├───────┼────────┼───────┼───────┼────────┼────────┤
│ 1 │ a │ 1 │ 1 │ a │ a │
└───────┴────────┴───────┴───────┴────────┴────────┘
When multiple columns are selected with `before` or `after`, those
selected columns are moved before and after the `selectors` input
>>> t = ibis.memtable(dict(a=[1], b=["a"], c=[1], d=["a"]))
>>> t.relocate(s.numeric(), after=s.of_type("string"))
┏━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ b ┃ d ┃ a ┃ c ┃
┡━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
│ string │ string │ int64 │ int64 │
├────────┼────────┼───────┼───────┤
│ a │ a │ 1 │ 1 │
└────────┴────────┴───────┴───────┘
>>> t.relocate(s.numeric(), before=s.of_type("string"))
┏━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ a ┃ c ┃ b ┃ d ┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ int64 │ string │ string │
├───────┼───────┼────────┼────────┤
│ 1 │ 1 │ a │ a │
└───────┴───────┴────────┴────────┘
"""
import ibis.selectors as s

if not columns and before is None and after is None and not kwargs:
raise com.IbisInputError(
"At least one selector or `before` or `after` must be provided"
)

if before is not None and after is not None:
raise com.IbisInputError("Cannot specify both `before` and `after`")

sels = {}
table_columns = self.columns

for name, sel in itertools.chain(
zip(itertools.repeat(None), map(s._to_selector, columns)),
zip(kwargs.keys(), map(s._to_selector, kwargs.values())),
):
for pos in sel.positions(self):
if pos in sels:
# make sure the last duplicate column wins by reinserting
# the position if it already exists
del sels[pos]
sels[pos] = name if name is not None else table_columns[pos]

ncols = len(table_columns)

if before is not None:
where = min(s._to_selector(before).positions(self), default=0)
elif after is not None:
where = max(s._to_selector(after).positions(self), default=ncols - 1) + 1
else:
assert before is None and after is None
where = 0

# all columns that should come BEFORE the matched selectors
front = [left for left in range(where) if left not in sels]

# all columns that should come AFTER the matched selectors
back = [right for right in range(where, ncols) if right not in sels]

# selected columns
middle = [self[i].name(name) for i, name in sels.items()]

relocated = self.select(*front, *middle, *back)

assert len(relocated.columns) == ncols

return relocated


@public
class CachedTable(Table):
Expand Down
26 changes: 24 additions & 2 deletions ibis/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class Selector(Concrete):

@abc.abstractmethod
def expand(self, table: ir.Table) -> Sequence[ir.Value]:
"""Expand `table` into a sequence of value expressions.
"""Expand `table` into value expressions that match the selector.
Parameters
----------
Expand All @@ -83,9 +83,26 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]:
Returns
-------
Sequence[Value]
A sequence of value expressions
A sequence of value expressions that match the selector
"""

def positions(self, table: ir.Table) -> Sequence[int]:
"""Expand `table` into column indices that match the selector.
Parameters
----------
table
An ibis table expression
Returns
-------
Sequence[int]
A sequence of column indices where the selector matches
"""
raise NotImplementedError(
f"`positions` doesn't make sense for {self.__class__.__name__} selector"
)


class Predicate(Selector):
predicate: Callable[[ir.Value], bool]
Expand All @@ -100,6 +117,11 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]:
"""
return [col for column in table.columns if self.predicate(col := table[column])]

def positions(self, table: ir.Table) -> Sequence[int]:
return [
i for i, column in enumerate(table.columns) if self.predicate(table[column])
]

def __and__(self, other: Selector) -> Predicate:
"""Compute the conjunction of two `Selector`s.
Expand Down
88 changes: 88 additions & 0 deletions ibis/tests/expr/test_relocate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from __future__ import annotations

import pytest

import ibis
import ibis.common.exceptions as exc
import ibis.selectors as s


def test_individual_columns():
t = ibis.table(dict(x="int", y="int"))
assert t.relocate("x", after="y").columns == list("yx")
assert t.relocate("y", before="x").columns == list("yx")


def test_move_blocks():
t = ibis.table(dict(x="int", a="string", y="int", b="string"))
assert t.relocate(s.of_type("string")).columns == list("abxy")
assert t.relocate(s.of_type("string"), after=s.numeric()).columns == list("xyab")


def test_keep_non_contiguous_variables():
t = ibis.table(dict.fromkeys("abcde", "int"))
assert t.relocate("b", after=s.c("a", "c", "e")).columns == list("acdeb")
assert t.relocate("e", before=s.c("b", "d")).columns == list("aebcd")


def test_before_after_does_not_move_to_front():
t = ibis.table(dict(x="int", y="int"))
assert t.relocate("y").columns == list("yx")


def test_only_one_of_before_and_after():
t = ibis.table(dict(x="int", y="int", z="int"))

with pytest.raises(exc.IbisInputError, match="Cannot specify both"):
t.relocate("z", before="x", after="y")


def test_respects_order():
t = ibis.table(dict.fromkeys("axbzy", "int"))
assert t.relocate("x", "y", "z", before="x").columns == list("axyzb")
assert t.relocate("x", "y", "z", before=s.last()).columns == list("abxyz")
assert t.relocate("x", "a", "z").columns == list("xazby")


def test_relocate_can_rename():
t = ibis.table(dict(a="int", b="int", c="int", d="string", e="string", f=r"string"))
assert t.relocate(ffff="f").columns == ["ffff", *"abcde"]
assert t.relocate(ffff="f", before="c").columns == [*"ab", "ffff", *"cde"]
assert t.relocate(ffff="f", after="c").columns == [*"abc", "ffff", *"de"]


def test_retains_last_duplicate_when_renaming_and_moving():
t = ibis.table(dict(x="int"))
assert t.relocate(a="x", b="x").columns == ["b"]

# TODO: test against .rename once that's implemented

t = ibis.table(dict(x="int", y="int"))
assert t.relocate(a="x", b="y", c="x").columns == list("bc")


def test_everything():
t = ibis.table(dict(w="int", x="int", y="int", z="int"))
assert t.relocate("y", "z", before=s.all()).columns == list("yzwx")
assert t.relocate("y", "z", after=s.all()).columns == list("wxyz")


def test_moves_to_front_with_no_before_and_no_after():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("z", "y").columns == list("zyx")


def test_empty_before_moves_to_front():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("y", before=s.of_type("string")).columns == list("yxz")


def test_empty_after_moves_to_end():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("y", after=s.of_type("string")).columns == list("xzy")


def test_no_arguments():
t = ibis.table(dict(x="int", y="int", z="int"))
with pytest.raises(exc.IbisInputError, match="At least one selector"):
assert t.relocate()

0 comments on commit ee8a86f

Please sign in to comment.