Skip to content

Commit

Permalink
perf(api): return tuple from Table.columns instead of list
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Sep 14, 2024
1 parent 7594d31 commit 7d89a39
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 59 deletions.
13 changes: 7 additions & 6 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,8 +674,9 @@ def __getitem__(self, what: str | int | slice | Sequence[str | int]):
limit, offset = util.slice_to_limit_offset(what, self.count())
return self.limit(limit, offset=offset)

columns = self.columns
args = [
self.columns[arg] if isinstance(arg, int) else arg
columns[arg] if isinstance(arg, int) else arg
for arg in util.promote_list(what)
]
if util.all_of(args, str):
Expand Down Expand Up @@ -765,25 +766,25 @@ def _ipython_key_completions_(self) -> list[str]:
return self.columns

@property
def columns(self) -> list[str]:
"""The list of column names in this table.
def columns(self) -> tuple[str, ...]:
"""Return a [](`tuple`) of column names in this table.
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t.columns
['species',
('species',
'island',
'bill_length_mm',
'bill_depth_mm',
'flipper_length_mm',
'body_mass_g',
'sex',
'year']
'year')
"""
return list(self.schema().names)
return self._arg.schema.names

def schema(self) -> sch.Schema:
"""Return the [Schema](./schemas.qmd#ibis.expr.schema.Schema) for this table.
Expand Down
6 changes: 3 additions & 3 deletions ibis/tests/expr/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,12 @@ def test_filter_self_join():

def test_is_ancestor_analytic():
x = ibis.table(ibis.schema([("col", "int32")]), "x")
with_filter_col = x.select(x.columns + [ibis.null().name("filter")])
with_filter_col = x.select(*x.columns, ibis.null().name("filter"))
filtered = with_filter_col.filter(with_filter_col["filter"].isnull())
subquery = filtered.select(filtered.columns)
subquery = filtered.select(*filtered.columns)

with_analytic = subquery.select(
subquery.columns + [subquery.count().name("analytic")]
*subquery.columns, subquery.count().name("analytic")
)

assert not subquery.op().equals(with_analytic.op())
Expand Down
46 changes: 23 additions & 23 deletions ibis/tests/expr/test_relocate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,31 @@

def test_individual_columns():
t = ibis.table(dict(x="int", y="int"))
assert t.relocate("x", after="y").columns == list("yx")
assert t.relocate("y", before="x").columns == list("yx")
assert t.relocate("x", after="y").columns == tuple("yx")
assert t.relocate("y", before="x").columns == tuple("yx")


def test_move_blocks():
t = ibis.table(dict(x="int", a="string", y="int", b="string"))
assert t.relocate(s.of_type("string")).columns == list("abxy")
assert t.relocate(s.of_type("string"), after=s.numeric()).columns == list("xyab")
assert t.relocate(s.of_type("string")).columns == tuple("abxy")
assert t.relocate(s.of_type("string"), after=s.numeric()).columns == tuple("xyab")


def test_duplicates_not_renamed():
t = ibis.table(dict(x="int", y="int"))
assert t.relocate("y", s.numeric()).columns == list("yx")
assert t.relocate("y", s.numeric(), "y").columns == list("yx")
assert t.relocate("y", s.numeric()).columns == tuple("yx")
assert t.relocate("y", s.numeric(), "y").columns == tuple("yx")


def test_keep_non_contiguous_variables():
t = ibis.table(dict.fromkeys("abcde", "int"))
assert t.relocate("b", after=s.cols("a", "c", "e")).columns == list("acdeb")
assert t.relocate("e", before=s.cols("b", "d")).columns == list("aebcd")
assert t.relocate("b", after=s.cols("a", "c", "e")).columns == tuple("acdeb")
assert t.relocate("e", before=s.cols("b", "d")).columns == tuple("aebcd")


def test_before_after_does_not_move_to_front():
t = ibis.table(dict(x="int", y="int"))
assert t.relocate("y").columns == list("yx")
assert t.relocate("y").columns == tuple("yx")


def test_only_one_of_before_and_after():
Expand All @@ -45,47 +45,47 @@ def test_only_one_of_before_and_after():

def test_respects_order():
t = ibis.table(dict.fromkeys("axbzy", "int"))
assert t.relocate("x", "y", "z", before="x").columns == list("axyzb")
assert t.relocate("x", "y", "z", before=s.last()).columns == list("abxyz")
assert t.relocate("x", "a", "z").columns == list("xazby")
assert t.relocate("x", "y", "z", before="x").columns == tuple("axyzb")
assert t.relocate("x", "y", "z", before=s.last()).columns == tuple("abxyz")
assert t.relocate("x", "a", "z").columns == tuple("xazby")


def test_relocate_can_rename():
t = ibis.table(dict(a="int", b="int", c="int", d="string", e="string", f=r"string"))
assert t.relocate(ffff="f").columns == ["ffff", *"abcde"]
assert t.relocate(ffff="f", before="c").columns == [*"ab", "ffff", *"cde"]
assert t.relocate(ffff="f", after="c").columns == [*"abc", "ffff", *"de"]
assert t.relocate(ffff="f").columns == ("ffff", *"abcde")
assert t.relocate(ffff="f", before="c").columns == (*"ab", "ffff", *"cde")
assert t.relocate(ffff="f", after="c").columns == (*"abc", "ffff", *"de")


def test_retains_last_duplicate_when_renaming_and_moving():
t = ibis.table(dict(x="int"))
assert t.relocate(a="x", b="x").columns == ["b"]
assert t.relocate(a="x", b="x").columns == ("b",)

# TODO: test against .rename once that's implemented

t = ibis.table(dict(x="int", y="int"))
assert t.relocate(a="x", b="y", c="x").columns == list("bc")
assert t.relocate(a="x", b="y", c="x").columns == tuple("bc")


def test_everything():
t = ibis.table(dict(w="int", x="int", y="int", z="int"))
assert t.relocate("y", "z", before=s.all()).columns == list("yzwx")
assert t.relocate("y", "z", after=s.all()).columns == list("wxyz")
assert t.relocate("y", "z", before=s.all()).columns == tuple("yzwx")
assert t.relocate("y", "z", after=s.all()).columns == tuple("wxyz")


def test_moves_to_front_with_no_before_and_no_after():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("z", "y").columns == list("zyx")
assert t.relocate("z", "y").columns == tuple("zyx")


def test_empty_before_moves_to_front():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("y", before=s.of_type("string")).columns == list("yxz")
assert t.relocate("y", before=s.of_type("string")).columns == tuple("yxz")


def test_empty_after_moves_to_end():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate("y", after=s.of_type("string")).columns == list("xzy")
assert t.relocate("y", after=s.of_type("string")).columns == tuple("xzy")


def test_no_arguments():
Expand All @@ -96,7 +96,7 @@ def test_no_arguments():

def test_tuple_input():
t = ibis.table(dict(x="int", y="int", z="int"))
assert t.relocate(("y", "z")).columns == list("yzx")
assert t.relocate(("y", "z")).columns == tuple("yzx")

# not allowed, because this would be technically inconsistent with `select`
# though, the tuple is unambiguous here and could never be interpreted as a
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def test_methods(penguins):

selector = s.across(s.all(), ibis.null(_.type()))
bound = selector.expand(penguins)
assert [col.get_name() for col in bound] == penguins.columns
assert [col.get_name() for col in bound] == list(penguins.columns)


@pytest.mark.parametrize("sel", [s.none(), s.cols(), []])
Expand Down
47 changes: 21 additions & 26 deletions ibis/tests/expr/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_empty_schema():
def test_columns(con):
t = con.table("alltypes")
result = t.columns
expected = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]
expected = ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k")
assert result == expected


Expand Down Expand Up @@ -773,7 +773,7 @@ def test_filter_on_literal_string_is_column(table):
def test_filter_on_literal_then_aggregate(table):
# Mostly just a smoketest, this used to error on construction
expr = table.filter(ibis.literal(True)).agg(lambda t: t.a.sum().name("total"))
assert expr.columns == ["total"]
assert expr.columns == ("total",)


def test_group_by_having_api(table):
Expand Down Expand Up @@ -966,12 +966,7 @@ def test_asof_join():
right = ibis.table([("time", "int32"), ("value2", "double")])
joined = api.asof_join(left, right, "time")

assert joined.columns == [
"time",
"value",
"time_right",
"value2",
]
assert joined.columns == ("time", "value", "time_right", "value2")
pred = joined.op().rest[0].predicates[0]
assert pred.left.name == pred.right.name == "time"

Expand Down Expand Up @@ -1124,10 +1119,10 @@ def test_self_join_no_view_convenience(table):
# column names to join on rather than referentially-valid expressions

result = table.join(table, [("g", "g")])
expected_cols = list(table.columns)
expected_cols = table.columns
# TODO(kszucs): the inner join convenience to don't duplicate the
# equivalent columns from the right table is not implemented yet
expected_cols.extend(f"{c}_right" for c in table.columns if c != "g")
expected_cols += tuple(f"{c}_right" for c in table.columns if c != "g")
assert result.columns == expected_cols


Expand Down Expand Up @@ -1228,34 +1223,34 @@ def test_inner_join_overlapping_column_names():
joined = t1.join(t2, "foo")
expected = t1.join(t2, t1.foo == t2.foo)
assert_equal(joined, expected)
assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"]
assert joined.columns == ("foo", "bar", "value1", "bar_right", "value2")

joined = t1.join(t2, ["foo", "bar"])
expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar])
assert_equal(joined, expected)
assert joined.columns == ["foo", "bar", "value1", "value2"]
assert joined.columns == ("foo", "bar", "value1", "value2")

# Equality predicates don't have same name, need to rename
joined = t1.join(t2, t1.foo == t2.bar)
assert joined.columns == [
assert joined.columns == (
"foo",
"bar",
"value1",
"foo_right",
"bar_right",
"value2",
]
)

# Not all predicates are equality, still need to rename
joined = t1.join(t2, ["foo", t1.value1 < t2.value2])
assert joined.columns == [
assert joined.columns == (
"foo",
"bar",
"value1",
"foo_right",
"bar_right",
"value2",
]
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1646,7 +1641,7 @@ def test_pickle_asof_join():
def test_group_by_key_function():
t = ibis.table([("a", "timestamp"), ("b", "string"), ("c", "double")])
expr = t.group_by(new_key=lambda t: t.b.length()).aggregate(foo=t.c.mean())
assert expr.columns == ["new_key", "foo"]
assert expr.columns == ("new_key", "foo")


def test_unbound_table_name():
Expand Down Expand Up @@ -1742,14 +1737,14 @@ def test_merge_as_of_allows_overlapping_columns():
signal_two = signal_two.rename(voltage="value", signal_two="field")

merged = signal_one.asof_join(signal_two, "timestamp_received")
assert merged.columns == [
assert merged.columns == (
"current",
"timestamp_received",
"signal_one",
"voltage",
"timestamp_received_right",
"signal_two",
]
)


def test_select_from_unambiguous_join_with_strings():
Expand All @@ -1758,7 +1753,7 @@ def test_select_from_unambiguous_join_with_strings():
s = ibis.table([("b", "int64"), ("c", "string")])
joined = t.left_join(s, [t.b == s.c])
expr = joined.select(t, "c")
assert expr.columns == ["a", "b", "c"]
assert expr.columns == ("a", "b", "c")


def test_filter_applied_to_join():
Expand All @@ -1770,7 +1765,7 @@ def test_filter_applied_to_join():
gdp,
predicates=[countries["iso_alpha3"] == gdp["country_code"]],
).filter(gdp["year"] == 2017)
assert expr.columns == ["iso_alpha3", "country_code", "year"]
assert expr.columns == ("iso_alpha3", "country_code", "year")


@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"])
Expand All @@ -1780,16 +1775,16 @@ def test_join_lname_rname(how):
method = getattr(left, f"{how}_join")

expr = method(right)
assert expr.columns == ["id", "first_name", "id_right", "last_name"]
assert expr.columns == ("id", "first_name", "id_right", "last_name")

expr = method(right, rname="right_{name}")
assert expr.columns == ["id", "first_name", "right_id", "last_name"]
assert expr.columns == ("id", "first_name", "right_id", "last_name")

expr = method(right, lname="left_{name}", rname="")
assert expr.columns == ["left_id", "first_name", "id", "last_name"]
assert expr.columns == ("left_id", "first_name", "id", "last_name")

expr = method(right, rname="right_{name}", lname="left_{name}")
assert expr.columns == ["left_id", "first_name", "right_id", "last_name"]
assert expr.columns == ("left_id", "first_name", "right_id", "last_name")


def test_join_lname_rname_still_collide():
Expand Down Expand Up @@ -1850,7 +1845,7 @@ def test_memtable_filter():
# Mostly just a smoketest, this used to error on construction
t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"])
expr = t.filter(t.x > 1)
assert expr.columns == ["x", "y"]
assert expr.columns == ("x", "y")


def test_default_backend_with_unbound_table():
Expand Down

0 comments on commit 7d89a39

Please sign in to comment.