From ff3550cf1ceb1a178a1cb24add0362b1a29519df Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:20:05 -0400 Subject: [PATCH] test(backends): fix backend tests that assume a list --- ibis/backends/__init__.py | 2 +- .../bigquery/tests/system/test_client.py | 2 +- ibis/backends/impala/tests/test_ddl.py | 2 +- ibis/backends/impala/tests/test_exprs.py | 2 +- ibis/backends/snowflake/tests/test_client.py | 2 +- ibis/backends/tests/test_aggregation.py | 2 +- ibis/backends/tests/test_client.py | 2 +- ibis/backends/tests/test_examples.py | 2 +- ibis/backends/tests/test_generic.py | 8 ++--- ibis/backends/tests/test_join.py | 27 ++++++----------- ibis/backends/tests/test_set_ops.py | 2 +- ibis/expr/types/dataframe_interchange.py | 2 +- ibis/expr/types/relations.py | 6 ++-- ibis/selectors.py | 30 +++++++++---------- ibis/tests/expr/test_analytics.py | 4 +-- ibis/tests/expr/test_table.py | 6 ++-- 16 files changed, 46 insertions(+), 55 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index ecec01a0bb15..8b73d812a34b 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -221,7 +221,7 @@ def to_pyarrow( table = pa.Table.from_batches(reader, schema=arrow_schema) return expr.__pyarrow_result__( - table.rename_columns(table_expr.columns).cast(arrow_schema) + table.rename_columns(list(table_expr.columns)).cast(arrow_schema) ) @util.experimental diff --git a/ibis/backends/bigquery/tests/system/test_client.py b/ibis/backends/bigquery/tests/system/test_client.py index 0ae05872a52e..ad837065a949 100644 --- a/ibis/backends/bigquery/tests/system/test_client.py +++ b/ibis/backends/bigquery/tests/system/test_client.py @@ -196,7 +196,7 @@ def test_parted_column(con, kind): table_name = f"{kind}_column_parted" t = con.table(table_name) expected_column = f"my_{kind}_parted_col" - assert t.columns == [expected_column, "string_col", "int_col"] + assert t.columns == (expected_column, "string_col", "int_col") def test_cross_project_query(public): diff --git a/ibis/backends/impala/tests/test_ddl.py b/ibis/backends/impala/tests/test_ddl.py index fd12a69a0be9..ad74932176b5 100644 --- a/ibis/backends/impala/tests/test_ddl.py +++ b/ibis/backends/impala/tests/test_ddl.py @@ -334,5 +334,5 @@ def test_varchar_char_support(temp_char_table): def test_access_kudu_table(kudu_table): - assert kudu_table.columns == ["a"] + assert kudu_table.columns == ("a",) assert kudu_table["a"].type() == dt.string diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py index b65cde32ef3f..84949ba314be 100644 --- a/ibis/backends/impala/tests/test_exprs.py +++ b/ibis/backends/impala/tests/test_exprs.py @@ -667,7 +667,7 @@ def test_where_with_timestamp(snapshot): def test_filter_with_analytic(snapshot): x = ibis.table(ibis.schema([("col", "int32")]), "x") - with_filter_col = x.select(x.columns + [ibis.null().name("filter")]) + with_filter_col = x.select(*x.columns, ibis.null().name("filter")) filtered = with_filter_col.filter(with_filter_col["filter"].isnull()) subquery = filtered.select(filtered.columns) diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py index 1b2b956e30ed..5e64c45a05bd 100644 --- a/ibis/backends/snowflake/tests/test_client.py +++ b/ibis/backends/snowflake/tests/test_client.py @@ -303,7 +303,7 @@ def test_insert(con): } ], ) - assert t.columns == ["ID", "NAME", "SCORE_1", "SCORE_2", "SCORE_3", "AGE"] + assert t.columns == ("ID", "NAME", "SCORE_1", "SCORE_2", "SCORE_3", "AGE") assert t.count().execute() == 1 diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 6e7a29224105..08677f3c0781 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1615,7 +1615,7 @@ def test_group_concat_over_window(backend, con): def test_value_counts_on_expr(backend, alltypes, df): expr = alltypes.bigint_col.add(1).value_counts() - columns = expr.columns + columns = list(expr.columns) expr = expr.order_by(columns) result = expr.execute().sort_values(columns).reset_index(drop=True) expected = df.bigint_col.add(1).value_counts().reset_index() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 0a0767bde68c..9ed5078b78cb 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1687,7 +1687,7 @@ def test_cross_database_join(con_create_database, monkeypatch): ) expr = left.join(right, "a") - assert expr.columns == ["a", "b", "c"] + assert expr.columns == ("a", "b", "c") result = expr.to_pyarrow() expected = pa.Table.from_pydict({"a": [1], "b": [2], "c": [3]}) diff --git a/ibis/backends/tests/test_examples.py b/ibis/backends/tests/test_examples.py index e5cbf97df57e..0c8ab5e3a8fb 100644 --- a/ibis/backends/tests/test_examples.py +++ b/ibis/backends/tests/test_examples.py @@ -61,5 +61,5 @@ ) def test_load_examples(con, example, columns): t = getattr(ibis.examples, example).fetch(backend=con) - assert t.columns == columns + assert t.columns == tuple(columns) assert t.count().execute() > 0 diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index d4b3c3819ebe..a42c84ba0d17 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -722,8 +722,8 @@ def test_order_by_two_cols_nulls(con, op1, nf1, nf2, op2, expected): def test_table_info(alltypes): expr = alltypes.info() df = expr.execute() - assert alltypes.columns == list(df.name) - assert expr.columns == [ + assert alltypes.columns == tuple(df.name) + assert expr.columns == ( "name", "type", "nullable", @@ -731,8 +731,8 @@ def test_table_info(alltypes): "non_nulls", "null_frac", "pos", - ] - assert expr.columns == list(df.columns) + ) + assert expr.columns == tuple(df.columns) @pytest.mark.notyet( diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index c36001f2b37b..bb7b1c9915d4 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -73,10 +73,11 @@ def test_mutating_join(backend, batting, awards_players, how): result_order = ["playerID", "yearID", "lgID", "stint"] expr = left.join(right, predicate, how=how) + cols = list(left.columns) if how == "inner": result = ( expr.execute() - .fillna(np.nan)[left.columns] + .fillna(np.nan)[cols] .sort_values(result_order) .reset_index(drop=True) ) @@ -86,23 +87,16 @@ def test_mutating_join(backend, batting, awards_players, how): .fillna(np.nan) .assign( playerID=lambda df: df.playerID.where( - df.playerID.notnull(), - df.playerID_right, + df.playerID.notnull(), df.playerID_right ) ) - .drop(["playerID_right"], axis=1)[left.columns] + .drop(["playerID_right"], axis=1)[cols] .sort_values(result_order) .reset_index(drop=True) ) expected = ( - check_eq( - left_df, - right_df, - how=how, - on=predicate, - suffixes=("_x", "_y"), - )[left.columns] + check_eq(left_df, right_df, how=how, on=predicate, suffixes=("_x", "_y"))[cols] .sort_values(result_order) .reset_index(drop=True) ) @@ -123,20 +117,17 @@ def test_filtering_join(backend, batting, awards_players, how): result_order = ["playerID", "yearID", "lgID", "stint"] expr = left.join(right, predicate, how=how) + cols = list(left.columns) result = ( expr.execute() .fillna(np.nan) - .sort_values(result_order)[left.columns] + .sort_values(result_order)[cols] .reset_index(drop=True) ) expected = check_eq( - left_df, - right_df, - how=how, - on=predicate, - suffixes=("", "_y"), - ).sort_values(result_order)[list(left.columns)] + left_df, right_df, how=how, on=predicate, suffixes=("", "_y") + ).sort_values(result_order)[cols] backend.assert_frame_equal(result, expected, check_like=True) diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index 60391f983525..64467b067012 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -16,7 +16,7 @@ @pytest.fixture def union_subsets(alltypes, df): - cols_a, cols_b, cols_c = (alltypes.columns.copy() for _ in range(3)) + cols_a, cols_b, cols_c = (list(alltypes.columns) for _ in range(3)) random.seed(89) random.shuffle(cols_a) diff --git a/ibis/expr/types/dataframe_interchange.py b/ibis/expr/types/dataframe_interchange.py index 7be679265993..0bca5ea65780 100644 --- a/ibis/expr/types/dataframe_interchange.py +++ b/ibis/expr/types/dataframe_interchange.py @@ -68,7 +68,7 @@ def num_columns(self): return len(self._table.columns) def column_names(self): - return self._table.columns + return list(self._table.columns) def get_column(self, i: int) -> IbisColumn: name = self._table.columns[i] diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 3891c1e69a76..0a6dd1f364be 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -413,7 +413,7 @@ def cast(self, schema: SchemaLike) -> Table: Columns not present in the input schema will be passed through unchanged >>> t.columns - ['species', 'island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex', 'year'] + ('species', 'island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex', 'year') >>> expr = t.cast({"body_mass_g": "float64", "bill_length_mm": "int"}) >>> expr.select(*cols).head() ┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ @@ -3263,7 +3263,7 @@ def cross_join( │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ >>> expr.columns - ['species', + ('species', 'island', 'bill_length_mm', 'bill_depth_mm', @@ -3274,7 +3274,7 @@ def cross_join( 'bill_length_mm_right', 'bill_depth_mm_right', 'flipper_length_mm_right', - 'body_mass_g_right'] + 'body_mass_g_right') >>> expr.count() ┌─────┐ │ 344 │ diff --git a/ibis/selectors.py b/ibis/selectors.py index 3e0acf0da4bd..ea818e63e7e1 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -19,14 +19,14 @@ >>> t = ibis.table(dict(a="int", b="string", c="array", abcd="float")) >>> expr = t.select([t[c] for c in t.columns if t[c].type().is_numeric()]) >>> expr.columns -['a', 'abcd'] +('a', 'abcd') Compare that to the [`numeric`](#ibis.selectors.numeric) selector: >>> import ibis.selectors as s >>> expr = t.select(s.numeric()) >>> expr.columns -['a', 'abcd'] +('a', 'abcd') When there are multiple properties to check it gets worse: @@ -39,13 +39,13 @@ ... ] ... ) >>> expr.columns -['a', 'b', 'abcd'] +('a', 'b', 'abcd') Using a composition of selectors this is much less tiresome: >>> expr = t.select((s.numeric() | s.of_type("string")) & s.contains(("a", "b", "cd"))) >>> expr.columns -['a', 'b', 'abcd'] +('a', 'b', 'abcd') """ from __future__ import annotations @@ -112,7 +112,7 @@ def where(predicate: Callable[[ir.Value], bool]) -> Selector: >>> t = ibis.table(dict(a="float32"), name="t") >>> expr = t.select(s.where(lambda col: col.get_name() == "a")) >>> expr.columns - ['a'] + ('a',) """ return Where(predicate) @@ -128,10 +128,10 @@ def numeric() -> Selector: >>> import ibis.selectors as s >>> t = ibis.table(dict(a="int", b="string", c="array"), name="t") >>> t.columns - ['a', 'b', 'c'] + ('a', 'b', 'c') >>> expr = t.select(s.numeric()) # `a` has integer type, so it's numeric >>> expr.columns - ['a'] + ('a',) See Also -------- @@ -168,13 +168,13 @@ def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Selector: >>> t = ibis.table(dict(name="string", siblings="array", parents="array")) >>> expr = t.select(s.of_type(dt.Array(dt.string))) >>> expr.columns - ['siblings'] + ('siblings',) Strings are also accepted >>> expr = t.select(s.of_type("array")) >>> expr.columns - ['siblings'] + ('siblings',) Abstract/unparametrized types may also be specified by their string name (e.g. "integer" for any integer type), or by passing in a `DataType` class @@ -185,7 +185,7 @@ def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Selector: >>> expr1.equals(expr2) True >>> expr2.columns - ['siblings', 'parents'] + ('siblings', 'parents') See Also -------- @@ -247,7 +247,7 @@ def startswith(prefixes: str | tuple[str, ...]) -> Selector: >>> t = ibis.table(dict(apples="int", oranges="float", bananas="bool"), name="t") >>> expr = t.select(s.startswith(("a", "b"))) >>> expr.columns - ['apples', 'bananas'] + ('apples', 'bananas') See Also -------- @@ -319,14 +319,14 @@ def contains( ... ) >>> expr = t.select(s.contains(("a", "b"))) >>> expr.columns - ['a', 'b', 'ab'] + ('a', 'b', 'ab') Select columns that contain all of `"a"` and `"b"`, that is, both `"a"` and `"b"` must be in each column's name to match. >>> expr = t.select(s.contains(("a", "b"), how=all)) >>> expr.columns - ['ab'] + ('ab',) See Also -------- @@ -359,7 +359,7 @@ def matches(regex: str | re.Pattern) -> Selector: >>> t = ibis.table(dict(ab="string", abd="int", be="array")) >>> expr = t.select(s.matches(r"ab+")) >>> expr.columns - ['ab', 'abd'] + ('ab', 'abd') See Also -------- @@ -410,7 +410,7 @@ def cols(*names: str | ir.Column) -> Selector: >>> t = ibis.table({"a": "int", "b": "int", "c": "int"}) >>> expr = t.select(s.cols("a", "b")) >>> expr.columns - ['a', 'b'] + ('a', 'b') See Also -------- diff --git a/ibis/tests/expr/test_analytics.py b/ibis/tests/expr/test_analytics.py index 348d04f1f01a..007c4e945efb 100644 --- a/ibis/tests/expr/test_analytics.py +++ b/ibis/tests/expr/test_analytics.py @@ -116,10 +116,10 @@ def test_topk_function_late_bind(airlines): def test_topk_name(airlines): expr1 = airlines.dest.topk(5, name="mycol") expr2 = airlines.dest.topk(5, by=_.count().name("mycol")) - assert expr1.columns == ["dest", "mycol"] + assert expr1.columns == ("dest", "mycol") assert_equal(expr1, expr2) expr3 = airlines.dest.topk(5, by=_.arrdelay.mean(), name="mycol") expr4 = airlines.dest.topk(5, by=_.arrdelay.mean().name("mycol")) - assert expr3.columns == ["dest", "mycol"] + assert expr3.columns == ("dest", "mycol") assert_equal(expr3, expr4) diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 329975dbdc41..2b117cbb93ab 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -874,18 +874,18 @@ def test_group_by_column_select_api(table): def test_value_counts(table): expr1 = table.g.value_counts() expr2 = table[["g"]].group_by("g").aggregate(g_count=_.count()) - assert expr1.columns == ["g", "g_count"] + assert expr1.columns == ("g", "g_count") assert_equal(expr1, expr2) expr3 = table.g.value_counts(name="freq") expr4 = table[["g"]].group_by("g").aggregate(freq=_.count()) - assert expr3.columns == ["g", "freq"] + assert expr3.columns == ("g", "freq") assert_equal(expr3, expr4) def test_value_counts_on_window_function(table): expr = (table.a - table.a.mean()).name("x").value_counts(name="count") - assert expr.columns == ["x", "count"] + assert expr.columns == ("x", "count") def test_value_counts_unnamed_expr(con):