From ee8a86f86e436cb33107e20bb8376b924a419bb3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 9 Aug 2023 08:30:28 -0400 Subject: [PATCH] feat(api): add `relocate` table expression API for moving columns around based on selectors --- ibis/expr/types/relations.py | 183 +++++++++++++++++++++++++++++++ ibis/selectors.py | 26 ++++- ibis/tests/expr/test_relocate.py | 88 +++++++++++++++ 3 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 ibis/tests/expr/test_relocate.py diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 861f98805c72..7b4f57e65050 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -3609,6 +3609,189 @@ def pivot_wider( return self.group_by(id_cols).aggregate(**aggs) + def relocate( + self, + *columns: str | s.Selector, + before: str | s.Selector | None = None, + after: str | s.Selector | None = None, + **kwargs: str, + ) -> Table: + """Relocate `columns` before or after other specified columns. + + Parameters + ---------- + columns + Columns to relocate. Selectors are accepted. + before + A column name or selector to insert the new columns before. + after + A column name or selector. Columns in `columns` are relocated after the last + column selected in `after`. + kwargs + Additional column names to relocate, renaming argument values to + keyword argument names. + + Returns + ------- + Table + A table with the columns relocated. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> import ibis.selectors as s + >>> t = ibis.memtable(dict(a=[1], b=[1], c=[1], d=["a"], e=["a"], f=["a"])) + >>> t + ┏━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ b ┃ c ┃ d ┃ e ┃ f ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ int64 │ string │ string │ string │ + ├───────┼───────┼───────┼────────┼────────┼────────┤ + │ 1 │ 1 │ 1 │ a │ a │ a │ + └───────┴───────┴───────┴────────┴────────┴────────┘ + >>> t.relocate("f") + ┏━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ f ┃ a ┃ b ┃ c ┃ d ┃ e ┃ + ┡━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ string │ int64 │ int64 │ int64 │ string │ string │ + ├────────┼───────┼───────┼───────┼────────┼────────┤ + │ a │ 1 │ 1 │ 1 │ a │ a │ + └────────┴───────┴───────┴───────┴────────┴────────┘ + >>> t.relocate("a", after="c") + ┏━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ b ┃ c ┃ a ┃ d ┃ e ┃ f ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ int64 │ string │ string │ string │ + ├───────┼───────┼───────┼────────┼────────┼────────┤ + │ 1 │ 1 │ 1 │ a │ a │ a │ + └───────┴───────┴───────┴────────┴────────┴────────┘ + >>> t.relocate("f", before="b") + ┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ f ┃ b ┃ c ┃ d ┃ e ┃ + ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ int64 │ string │ int64 │ int64 │ string │ string │ + ├───────┼────────┼───────┼───────┼────────┼────────┤ + │ 1 │ a │ 1 │ 1 │ a │ a │ + └───────┴────────┴───────┴───────┴────────┴────────┘ + >>> t.relocate("a", after=s.last()) + ┏━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ + ┃ b ┃ c ┃ d ┃ e ┃ f ┃ a ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ string │ string │ string │ int64 │ + ├───────┼───────┼────────┼────────┼────────┼───────┤ + │ 1 │ 1 │ a │ a │ a │ 1 │ + └───────┴───────┴────────┴────────┴────────┴───────┘ + + Relocate allows renaming + + >>> t.relocate(ff="f") + ┏━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ ff ┃ a ┃ b ┃ c ┃ d ┃ e ┃ + ┡━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ string │ int64 │ int64 │ int64 │ string │ string │ + ├────────┼───────┼───────┼───────┼────────┼────────┤ + │ a │ 1 │ 1 │ 1 │ a │ a │ + └────────┴───────┴───────┴───────┴────────┴────────┘ + + You can relocate based on any predicate selector, such as + [`of_type`][ibis.selectors.of_type] + + >>> t.relocate(s.of_type("string")) + ┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓ + ┃ d ┃ e ┃ f ┃ a ┃ b ┃ c ┃ + ┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩ + │ string │ string │ string │ int64 │ int64 │ int64 │ + ├────────┼────────┼────────┼───────┼───────┼───────┤ + │ a │ a │ a │ 1 │ 1 │ 1 │ + └────────┴────────┴────────┴───────┴───────┴───────┘ + >>> t.relocate(s.numeric(), after=s.last()) + ┏━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓ + ┃ d ┃ e ┃ f ┃ a ┃ b ┃ c ┃ + ┡━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩ + │ string │ string │ string │ int64 │ int64 │ int64 │ + ├────────┼────────┼────────┼───────┼───────┼───────┤ + │ a │ a │ a │ 1 │ 1 │ 1 │ + └────────┴────────┴────────┴───────┴───────┴───────┘ + >>> t.relocate(s.any_of(s.c(*"ae"))) + ┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ e ┃ b ┃ c ┃ d ┃ f ┃ + ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ int64 │ string │ int64 │ int64 │ string │ string │ + ├───────┼────────┼───────┼───────┼────────┼────────┤ + │ 1 │ a │ 1 │ 1 │ a │ a │ + └───────┴────────┴───────┴───────┴────────┴────────┘ + + When multiple columns are selected with `before` or `after`, those + selected columns are moved before and after the `selectors` input + + >>> t = ibis.memtable(dict(a=[1], b=["a"], c=[1], d=["a"])) + >>> t.relocate(s.numeric(), after=s.of_type("string")) + ┏━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓ + ┃ b ┃ d ┃ a ┃ c ┃ + ┡━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩ + │ string │ string │ int64 │ int64 │ + ├────────┼────────┼───────┼───────┤ + │ a │ a │ 1 │ 1 │ + └────────┴────────┴───────┴───────┘ + >>> t.relocate(s.numeric(), before=s.of_type("string")) + ┏━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ c ┃ b ┃ d ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ string │ string │ + ├───────┼───────┼────────┼────────┤ + │ 1 │ 1 │ a │ a │ + └───────┴───────┴────────┴────────┘ + """ + import ibis.selectors as s + + if not columns and before is None and after is None and not kwargs: + raise com.IbisInputError( + "At least one selector or `before` or `after` must be provided" + ) + + if before is not None and after is not None: + raise com.IbisInputError("Cannot specify both `before` and `after`") + + sels = {} + table_columns = self.columns + + for name, sel in itertools.chain( + zip(itertools.repeat(None), map(s._to_selector, columns)), + zip(kwargs.keys(), map(s._to_selector, kwargs.values())), + ): + for pos in sel.positions(self): + if pos in sels: + # make sure the last duplicate column wins by reinserting + # the position if it already exists + del sels[pos] + sels[pos] = name if name is not None else table_columns[pos] + + ncols = len(table_columns) + + if before is not None: + where = min(s._to_selector(before).positions(self), default=0) + elif after is not None: + where = max(s._to_selector(after).positions(self), default=ncols - 1) + 1 + else: + assert before is None and after is None + where = 0 + + # all columns that should come BEFORE the matched selectors + front = [left for left in range(where) if left not in sels] + + # all columns that should come AFTER the matched selectors + back = [right for right in range(where, ncols) if right not in sels] + + # selected columns + middle = [self[i].name(name) for i, name in sels.items()] + + relocated = self.select(*front, *middle, *back) + + assert len(relocated.columns) == ncols + + return relocated + @public class CachedTable(Table): diff --git a/ibis/selectors.py b/ibis/selectors.py index 9d6410165685..95c494956caa 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -73,7 +73,7 @@ class Selector(Concrete): @abc.abstractmethod def expand(self, table: ir.Table) -> Sequence[ir.Value]: - """Expand `table` into a sequence of value expressions. + """Expand `table` into value expressions that match the selector. Parameters ---------- @@ -83,9 +83,26 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]: Returns ------- Sequence[Value] - A sequence of value expressions + A sequence of value expressions that match the selector """ + def positions(self, table: ir.Table) -> Sequence[int]: + """Expand `table` into column indices that match the selector. + + Parameters + ---------- + table + An ibis table expression + + Returns + ------- + Sequence[int] + A sequence of column indices where the selector matches + """ + raise NotImplementedError( + f"`positions` doesn't make sense for {self.__class__.__name__} selector" + ) + class Predicate(Selector): predicate: Callable[[ir.Value], bool] @@ -100,6 +117,11 @@ def expand(self, table: ir.Table) -> Sequence[ir.Value]: """ return [col for column in table.columns if self.predicate(col := table[column])] + def positions(self, table: ir.Table) -> Sequence[int]: + return [ + i for i, column in enumerate(table.columns) if self.predicate(table[column]) + ] + def __and__(self, other: Selector) -> Predicate: """Compute the conjunction of two `Selector`s. diff --git a/ibis/tests/expr/test_relocate.py b/ibis/tests/expr/test_relocate.py new file mode 100644 index 000000000000..6e34967a9aa2 --- /dev/null +++ b/ibis/tests/expr/test_relocate.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import pytest + +import ibis +import ibis.common.exceptions as exc +import ibis.selectors as s + + +def test_individual_columns(): + t = ibis.table(dict(x="int", y="int")) + assert t.relocate("x", after="y").columns == list("yx") + assert t.relocate("y", before="x").columns == list("yx") + + +def test_move_blocks(): + t = ibis.table(dict(x="int", a="string", y="int", b="string")) + assert t.relocate(s.of_type("string")).columns == list("abxy") + assert t.relocate(s.of_type("string"), after=s.numeric()).columns == list("xyab") + + +def test_keep_non_contiguous_variables(): + t = ibis.table(dict.fromkeys("abcde", "int")) + assert t.relocate("b", after=s.c("a", "c", "e")).columns == list("acdeb") + assert t.relocate("e", before=s.c("b", "d")).columns == list("aebcd") + + +def test_before_after_does_not_move_to_front(): + t = ibis.table(dict(x="int", y="int")) + assert t.relocate("y").columns == list("yx") + + +def test_only_one_of_before_and_after(): + t = ibis.table(dict(x="int", y="int", z="int")) + + with pytest.raises(exc.IbisInputError, match="Cannot specify both"): + t.relocate("z", before="x", after="y") + + +def test_respects_order(): + t = ibis.table(dict.fromkeys("axbzy", "int")) + assert t.relocate("x", "y", "z", before="x").columns == list("axyzb") + assert t.relocate("x", "y", "z", before=s.last()).columns == list("abxyz") + assert t.relocate("x", "a", "z").columns == list("xazby") + + +def test_relocate_can_rename(): + t = ibis.table(dict(a="int", b="int", c="int", d="string", e="string", f=r"string")) + assert t.relocate(ffff="f").columns == ["ffff", *"abcde"] + assert t.relocate(ffff="f", before="c").columns == [*"ab", "ffff", *"cde"] + assert t.relocate(ffff="f", after="c").columns == [*"abc", "ffff", *"de"] + + +def test_retains_last_duplicate_when_renaming_and_moving(): + t = ibis.table(dict(x="int")) + assert t.relocate(a="x", b="x").columns == ["b"] + + # TODO: test against .rename once that's implemented + + t = ibis.table(dict(x="int", y="int")) + assert t.relocate(a="x", b="y", c="x").columns == list("bc") + + +def test_everything(): + t = ibis.table(dict(w="int", x="int", y="int", z="int")) + assert t.relocate("y", "z", before=s.all()).columns == list("yzwx") + assert t.relocate("y", "z", after=s.all()).columns == list("wxyz") + + +def test_moves_to_front_with_no_before_and_no_after(): + t = ibis.table(dict(x="int", y="int", z="int")) + assert t.relocate("z", "y").columns == list("zyx") + + +def test_empty_before_moves_to_front(): + t = ibis.table(dict(x="int", y="int", z="int")) + assert t.relocate("y", before=s.of_type("string")).columns == list("yxz") + + +def test_empty_after_moves_to_end(): + t = ibis.table(dict(x="int", y="int", z="int")) + assert t.relocate("y", after=s.of_type("string")).columns == list("xzy") + + +def test_no_arguments(): + t = ibis.table(dict(x="int", y="int", z="int")) + with pytest.raises(exc.IbisInputError, match="At least one selector"): + assert t.relocate()