diff --git a/docs/how-to/extending/builtin.qmd b/docs/how-to/extending/builtin.qmd index 3d3e0f9578b9..0fbb488a8bd2 100644 --- a/docs/how-to/extending/builtin.qmd +++ b/docs/how-to/extending/builtin.qmd @@ -79,7 +79,7 @@ rest of the library: pkgs = ibis.read_parquet( "https://storage.googleapis.com/ibis-tutorial-data/pypi/2024-04-24/packages.parquet" ) -pandas_ish = pkgs[jw_sim(pkgs.name, "pandas") >= 0.9] +pandas_ish = pkgs.filter(jw_sim(pkgs.name, "pandas") >= 0.9) pandas_ish ``` diff --git a/docs/tutorials/ibis-for-pandas-users.qmd b/docs/tutorials/ibis-for-pandas-users.qmd index e0fc2f5908e5..8001f45386f1 100644 --- a/docs/tutorials/ibis-for-pandas-users.qmd +++ b/docs/tutorials/ibis-for-pandas-users.qmd @@ -126,13 +126,6 @@ Selecting columns is very similar to in pandas. In fact, you can use the same sy t[["one", "two"]] ``` -However, since row-level indexing is not supported in Ibis, the inner list is not necessary. - - -```{python} -t["one", "two"] -``` - ## Selecting columns Selecting columns is done using the same syntax as in pandas `DataFrames`. You can use either @@ -192,11 +185,11 @@ new_col = unnamed.name("new_col") new_col ``` -You can then add this column to the table using a projection. +You can then add this column to the table using `mutate` ```{python} -proj = t["one", "two", new_col] +proj = t.mutate(new_col) proj ``` @@ -301,10 +294,9 @@ penguins.limit(5) ### Filtering rows In addition to limiting the number of rows that are returned, it is possible to -filter the rows using expressions. Expressions are constructed very similarly to -the way they are in pandas. Ibis expressions are constructed from operations on -columns in a table which return a boolean result. This result is then used to -filter the table. +filter the rows using expressions. This is done using the `filter` method in +ibis. Ibis expressions are constructed from operations on columns in a table +which return a boolean result. This result is then used to filter the table. ```{python} @@ -324,7 +316,7 @@ get 6 rows back. ```{python} -filtered = penguins[expr] +filtered = penguins.filter(expr) filtered ``` @@ -332,24 +324,22 @@ Of course, the filtering expression can be applied inline as well. ```{python} -filtered = penguins[penguins.bill_length_mm > 37.0] +filtered = penguins.filter(penguins.bill_length_mm > 37.0) filtered ``` -Multiple filtering expressions can be combined into a single expression or chained onto existing -table expressions. +Multiple filtering expressions may be passed in to a single call (filtering +only rows where they're all true), or combined together using common boolean +operators like (`&`, `|`). The expressions below are equivalent: ```{python} -filtered = penguins[(penguins.bill_length_mm > 37.0) & (penguins.bill_depth_mm > 18.0)] +filtered = penguins.filter(penguins.bill_length_mm > 37.0, penguins.bill_depth_mm > 18.0) filtered ``` -The code above will return the same rows as the code below. - - ```{python} -filtered = penguins[penguins.bill_length_mm > 37.0][penguins.bill_depth_mm > 18.0] +filtered = penguins.filter((penguins.bill_length_mm > 37.0) & (penguins.bill_depth_mm > 18.0)) filtered ``` @@ -359,7 +349,7 @@ is greater than the mean. ```{python} -filtered = penguins[penguins.bill_length_mm > penguins.bill_length_mm.mean()] +filtered = penguins.filter(penguins.bill_length_mm > penguins.bill_length_mm.mean()) filtered ``` diff --git a/docs/tutorials/ibis-for-sql-users.qmd b/docs/tutorials/ibis-for-sql-users.qmd index 534090bfce64..cbb9b4974d70 100644 --- a/docs/tutorials/ibis-for-sql-users.qmd +++ b/docs/tutorials/ibis-for-sql-users.qmd @@ -46,12 +46,6 @@ FROM my_data In Ibis, this is -```{python} -proj = t["two", "one"] -``` - -or - ```{python} proj = t.select("two", "one") ``` @@ -78,7 +72,7 @@ new_col = (t.three * 2).name("new_col") Now, we have: ```{python} -proj = t["two", "one", new_col] +proj = t.select("two", "one", new_col) ibis.to_sql(proj) ``` @@ -113,7 +107,7 @@ select all columns in a table using the `SELECT *` construct. To do this, use the table expression itself in a projection: ```{python} -proj = t[t] +proj = t.select(t) ibis.to_sql(proj) ``` @@ -121,7 +115,7 @@ This is how `mutate` is implemented. The example above `t.mutate(new_col=t.three * 2)` can be written as a normal projection: ```{python} -proj = t[t, new_col] +proj = t.select(t, new_col) ibis.to_sql(proj) ``` @@ -144,7 +138,7 @@ To write this with Ibis, it is: ```{python} diff = (t.two - t2.value).name("diff") -joined = t.join(t2, t.one == t2.key)[t, diff] +joined = t.join(t2, t.one == t2.key).select(t, diff) ``` And verify the generated SQL: @@ -188,19 +182,18 @@ ibis.to_sql(expr) ## Filtering / `WHERE` -You can add filter clauses to a table expression either by indexing with -`[]` (similar to pandas) or use the `filter` method: +You can add filter clauses to a table expression by using the `filter` method: ```{python} -filtered = t[t.two > 0] +filtered = t.filter(t.two > 0) ibis.to_sql(filtered) ``` -`filter` can take a list of expressions, which must all be satisfied for +`filter` can take multiple expressions, which must all be satisfied for a row to appear in the result: ```{python} -filtered = t.filter([t.two > 0, t.one.isin(["A", "B"])]) +filtered = t.filter(t.two > 0, t.one.isin(["A", "B"])) ibis.to_sql(filtered) ``` @@ -209,7 +202,7 @@ To compose boolean expressions with `AND` or `OR`, use the respective ```{python} cond = (t.two < 0) | ((t.two > 0) | t.one.isin(["A", "B"])) -filtered = t[cond] +filtered = t.filter(cond) ibis.to_sql(filtered) ``` @@ -617,7 +610,7 @@ ibis.to_sql(expr) ```{python} agged = ( - expr[expr.one.notnull()] + expr.filter(expr.one.notnull()) .group_by("is_valid") .aggregate(three_count=lambda t: t.three.notnull().sum()) ) @@ -632,7 +625,7 @@ keyword. The result of `between` is boolean and can be used with any other boolean expression: ```{python} -expr = t[t.two.between(10, 50) & t.one.notnull()] +expr = t.filter(t.two.between(10, 50) & t.one.notnull()) ibis.to_sql(expr) ``` @@ -684,7 +677,7 @@ After one or more joins, you can reference any of the joined tables in a projection immediately after: ```{python} -expr = joined[t1, t2.value2] +expr = joined.select(t1, t2.value2) ibis.to_sql(expr) ``` @@ -692,7 +685,7 @@ If you need to compute an expression that involves both tables, you can do that also: ```{python} -expr = joined[t1.key1, (t1.value1 - t2.value2).name("diff")] +expr = joined.select(t1.key1, (t1.value1 - t2.value2).name("diff")) ibis.to_sql(expr) ``` @@ -800,7 +793,7 @@ In these case, we can specify a list of common join keys: ```{python} joined = t4.join(t5, ["key1", "key2", "key3"]) -expr = joined[t4, t5.value2] +expr = joined.select(t4, t5.value2) ibis.to_sql(expr) ``` @@ -808,7 +801,7 @@ You can mix the overlapping key names with other expressions: ```{python} joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t5.key3.left(4)]) -expr = joined[t4, t5.value2] +expr = joined.select(t4, t5.value2) ibis.to_sql(expr) ``` @@ -885,7 +878,7 @@ cond = (events.user_id == purchases.user_id).any() This can now be used to filter `events`: ```{python} -expr = events[cond] +expr = events.filter(cond) ibis.to_sql(expr) ``` @@ -893,7 +886,7 @@ If you negate the condition, it will instead give you only event data from user *that have not made a purchase*: ```{python} -expr = events[-cond] +expr = events.filter(-cond) ibis.to_sql(expr) ``` @@ -916,7 +909,7 @@ you can write with Ibis: ```{python} cond = events.user_id.isin(purchases.user_id) -expr = events[cond] +expr = events.filter(cond) ibis.to_sql(expr) ``` @@ -941,7 +934,7 @@ WHERE value1 > ( With Ibis, the code is simpler and more pandas-like: ```{python} -expr = t1[t1.value1 > t2.value2.max()] +expr = t1.filter(t1.value1 > t2.value2.max()) ibis.to_sql(expr) ``` @@ -968,8 +961,8 @@ With Ibis, the code is similar, but you add the correlated filter to the average statistic: ```{python} -stat = t2[t1.key1 == t2.key3].value2.mean() -expr = t1[t1.value1 > stat] +stat = t2.filter(t1.key1 == t2.key3).value2.mean() +expr = t1.filter(t1.value1 > stat) ibis.to_sql(expr) ``` @@ -1118,7 +1111,7 @@ Ibis provides a `row_number()` function that allows you to do this: expr = purchases.mutate( row_number=ibis.row_number().over(group_by=[_.user_id], order_by=_.price) ) -expr = expr[_.row_number < 3] +expr = expr.filter(_.row_number < 3) ``` The output of this is a table with the three most expensive items that each user has purchased @@ -1149,7 +1142,7 @@ Ibis has a set of interval APIs that allow you to do date/time arithmetic. For example: ```{python} -expr = events[events.ts > (ibis.now() - ibis.interval(years=1))] +expr = events.filter(events.ts > (ibis.now() - ibis.interval(years=1))) ibis.to_sql(expr) ``` @@ -1214,12 +1207,13 @@ purchases = ibis.table( metric = purchases.amount.sum().name("total") agged = purchases.group_by(["region", "kind"]).aggregate(metric) -left = agged[agged.kind == "foo"] -right = agged[agged.kind == "bar"] +left = agged.filter(agged.kind == "foo") +right = agged.filter(agged.kind == "bar") -result = left.join(right, left.region == right.region)[ - left.region, (left.total - right.total).name("diff") -] +result = ( + left.join(right, left.region == right.region) + .select(left.region, (left.total - right.total).name("diff")) +) ``` Ibis automatically creates a CTE for `agged`: diff --git a/docs/tutorials/open-source-software/apache-flink/1_single_feature.qmd b/docs/tutorials/open-source-software/apache-flink/1_single_feature.qmd index 86747c5d8e00..4fef3e407f6a 100644 --- a/docs/tutorials/open-source-software/apache-flink/1_single_feature.qmd +++ b/docs/tutorials/open-source-software/apache-flink/1_single_feature.qmd @@ -184,7 +184,7 @@ transaction count over the past five hours may be useful features. Let’s write out each of these using Ibis API: ```{python} -user_trans_amt_last_360m_agg = source_table[ +user_trans_amt_last_360m_agg = source_table.select( source_table.user_id, # Calculate the average transaction amount over the past six hours source_table.amt.mean() @@ -207,7 +207,7 @@ user_trans_amt_last_360m_agg = source_table[ ) .name("user_trans_count_last_360min"), source_table.trans_date_trans_time, -] +) ``` `over()` creates an [over diff --git a/ibis/backends/bigquery/tests/system/test_client.py b/ibis/backends/bigquery/tests/system/test_client.py index c31a33bd8694..0ae05872a52e 100644 --- a/ibis/backends/bigquery/tests/system/test_client.py +++ b/ibis/backends/bigquery/tests/system/test_client.py @@ -186,7 +186,7 @@ def test_scalar_param_partition_time(parted_alltypes): assert "PARTITIONTIME" in parted_alltypes.columns assert "PARTITIONTIME" in parted_alltypes.schema() param = ibis.param("timestamp('UTC')") - expr = parted_alltypes[param > parted_alltypes.PARTITIONTIME] + expr = parted_alltypes.filter(param > parted_alltypes.PARTITIONTIME) df = expr.execute(params={param: "2017-01-01"}) assert df.empty @@ -201,7 +201,7 @@ def test_parted_column(con, kind): def test_cross_project_query(public): table = public.table("posts_questions") - expr = table[table.tags.contains("ibis")][["title", "tags"]] + expr = table.filter(table.tags.contains("ibis"))[["title", "tags"]] n = 5 df = expr.limit(n).execute() assert len(df) == n @@ -231,7 +231,7 @@ def test_multiple_project_queries_execute(con): trips = con.table("trips", database="nyc-tlc.yellow").limit(5) predicate = posts_questions.tags == trips.rate_code cols = [posts_questions.title] - join = posts_questions.left_join(trips, predicate)[cols] + join = posts_questions.left_join(trips, predicate).select(cols) result = join.execute() assert list(result.columns) == ["title"] assert len(result) == 5 diff --git a/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py b/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py index 632ba2622792..2587914ba697 100644 --- a/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py +++ b/ibis/backends/bigquery/tests/system/udf/test_udf_execute.py @@ -18,7 +18,7 @@ @pytest.fixture(scope="module") def alltypes(con): t = con.table("functional_alltypes") - expr = t[t.bigint_col.isin([10, 20])].limit(10) + expr = t.filter(t.bigint_col.isin([10, 20])).limit(10) return expr diff --git a/ibis/backends/bigquery/tests/unit/test_compiler.py b/ibis/backends/bigquery/tests/unit/test_compiler.py index e058cd214c86..660ee779d800 100644 --- a/ibis/backends/bigquery/tests/unit/test_compiler.py +++ b/ibis/backends/bigquery/tests/unit/test_compiler.py @@ -151,11 +151,11 @@ def test_projection_fusion_only_peeks_at_immediate_parent(snapshot): ("val", "int64"), ] table = ibis.table(schema, name="unbound_table") - table = table[table.PARTITIONTIME < ibis.date("2017-01-01")] + table = table.filter(table.PARTITIONTIME < ibis.date("2017-01-01")) table = table.mutate(file_date=table.file_date.cast("date")) - table = table[table.file_date < ibis.date("2017-01-01")] + table = table.filter(table.file_date < ibis.date("2017-01-01")) table = table.mutate(XYZ=table.val * 2) - expr = table.join(table.view())[table] + expr = table.join(table.view()).select(table) snapshot.assert_match(to_sql(expr), "out.sql") @@ -276,7 +276,7 @@ class MockBackend(ibis.backends.bigquery.Backend): for _ in range(num_joins): # noqa: F402 table = table.mutate(dummy=ibis.literal("")) table_ = table.view() - table = table.left_join(table_, ["dummy"])[[table_]] + table = table.left_join(table_, ["dummy"]).select(table_) start = time.time() table.compile() @@ -417,9 +417,9 @@ def test_divide_by_zero(alltypes, op, snapshot): def test_identical_to(alltypes, snapshot): - expr = alltypes[ + expr = alltypes.filter( _.string_col.identical_to("a") & _.date_string_col.identical_to("b") - ] + ) snapshot.assert_match(to_sql(expr), "out.sql") diff --git a/ibis/backends/clickhouse/tests/test_aggregations.py b/ibis/backends/clickhouse/tests/test_aggregations.py index 6f376d263bab..c9e5bb38c9ad 100644 --- a/ibis/backends/clickhouse/tests/test_aggregations.py +++ b/ibis/backends/clickhouse/tests/test_aggregations.py @@ -163,7 +163,7 @@ def test_boolean_reduction(alltypes, op, df): def test_anonymous_aggregate(alltypes, df): t = alltypes - expr = t[t.double_col > t.double_col.mean()] + expr = t.filter(t.double_col > t.double_col.mean()) result = expr.execute().set_index("id") expected = df[df.double_col > df.double_col.mean()].set_index("id") tm.assert_frame_equal(result, expected, check_like=True) diff --git a/ibis/backends/clickhouse/tests/test_client.py b/ibis/backends/clickhouse/tests/test_client.py index 311889aea091..270b305f3aeb 100644 --- a/ibis/backends/clickhouse/tests/test_client.py +++ b/ibis/backends/clickhouse/tests/test_client.py @@ -129,7 +129,7 @@ def test_sql_query_limits(alltypes): def test_embedded_identifier_quoting(alltypes): t = alltypes - expr = t[[(t.double_col * 2).name("double(fun)")]]["double(fun)"].sum() + expr = t.select((t.double_col * 2).name("double(fun)"))["double(fun)"].sum() expr.execute() diff --git a/ibis/backends/clickhouse/tests/test_functions.py b/ibis/backends/clickhouse/tests/test_functions.py index feb945d59b90..6a8d185e4d34 100644 --- a/ibis/backends/clickhouse/tests/test_functions.py +++ b/ibis/backends/clickhouse/tests/test_functions.py @@ -476,7 +476,7 @@ def my_add(a: int, b: int) -> int: ... n = 5 expr = ( - alltypes[alltypes.int_col == 1] + alltypes.filter(alltypes.int_col == 1) .limit(n) .int_col.collect() .map(lambda x: my_add(x, 1)) diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py index 3087b15bbdeb..364284191ab0 100644 --- a/ibis/backends/clickhouse/tests/test_select.py +++ b/ibis/backends/clickhouse/tests/test_select.py @@ -38,23 +38,23 @@ def time_right(con): def test_timestamp_extract_field(alltypes, assert_sql): t = alltypes.timestamp_col - expr = alltypes[ + expr = alltypes.select( t.year().name("year"), t.month().name("month"), t.day().name("day"), t.hour().name("hour"), t.minute().name("minute"), t.second().name("second"), - ] + ) assert_sql(expr) def test_isin_notin_in_select(alltypes, assert_sql): values = ["foo", "bar"] - filtered = alltypes[alltypes.string_col.isin(values)] + filtered = alltypes.filter(alltypes.string_col.isin(values)) assert_sql(filtered, "out1.sql") - filtered = alltypes[alltypes.string_col.notin(values)] + filtered = alltypes.filter(alltypes.string_col.notin(values)) assert_sql(filtered, "out2.sql") @@ -100,7 +100,7 @@ def test_simple_scalar_aggregates(alltypes, assert_sql): # Things like table.column.{sum, mean, ...}() table = alltypes - expr = table[table.int_col > 0].float_col.sum() + expr = table.filter(table.int_col > 0).float_col.sum() assert_sql(expr) @@ -152,7 +152,7 @@ def test_simple_scalar_aggregates(alltypes, assert_sql): def test_table_column_unbox(alltypes, assert_sql): m = alltypes.float_col.sum().name("total") - agged = alltypes[alltypes.int_col > 0].group_by("string_col").aggregate([m]) + agged = alltypes.filter(alltypes.int_col > 0).group_by("string_col").aggregate([m]) expr = agged.string_col assert_sql(expr) @@ -213,7 +213,7 @@ def test_simple_joins( ): t1, t2 = batting, awards_players pred = [t1[left_key] == t2[right_key]] - expr = getattr(t1, join_type)(t2, pred)[[t1]] + expr = getattr(t1, join_type)(t2, pred).select(t1) assert_sql(expr) @@ -226,7 +226,7 @@ def test_self_reference_simple(con, alltypes, assert_sql): def test_join_self_reference(con, alltypes, assert_sql): t1 = alltypes t2 = t1.view() - expr = t1.inner_join(t2, ["id"])[[t1]] + expr = t1.inner_join(t2, ["id"]).select(t1) assert_sql(expr) assert len(con.execute(expr)) @@ -261,7 +261,7 @@ def test_filter_predicates(diamonds): expr = diamonds for pred in predicates: - expr = expr[pred(expr)].select(expr) + expr = expr.filter(pred(expr)).select(expr) expr.execute() @@ -305,9 +305,9 @@ def test_join_with_external_table_errors(alltypes): ) alltypes = alltypes.mutate(b=alltypes.tinyint_col) - expr = alltypes.inner_join(external_table, ["b"])[ + expr = alltypes.inner_join(external_table, ["b"]).select( external_table.a, external_table.c, alltypes.id - ] + ) with pytest.raises(cc.driver.exceptions.DatabaseError): expr.execute() @@ -328,9 +328,9 @@ def test_join_with_external_table(alltypes, df): ) alltypes = alltypes.mutate(b=alltypes.tinyint_col) - expr = alltypes.inner_join(external_table, ["b"])[ + expr = alltypes.inner_join(external_table, ["b"]).select( external_table.a, external_table.c, alltypes.id - ] + ) result = expr.execute(external_tables={"external": external_df}) expected = df.assign(b=df.tinyint_col).merge(external_df, on="b")[["a", "c", "id"]] diff --git a/ibis/backends/dask/tests/test_arrays.py b/ibis/backends/dask/tests/test_arrays.py index 107cca5fedaa..39d89ee7a1e0 100644 --- a/ibis/backends/dask/tests/test_arrays.py +++ b/ibis/backends/dask/tests/test_arrays.py @@ -59,7 +59,7 @@ def test_array_collect(t, df): def test_array_collect_rolling_partitioned(t, df): window = ibis.trailing_window(1, order_by=t.plain_int64) colexpr = t.plain_float64.collect().over(window) - expr = t["dup_strings", "plain_int64", colexpr.name("collected")] + expr = t.select("dup_strings", "plain_int64", colexpr.name("collected")) result = expr.compile() expected = dd.from_pandas( pd.DataFrame( @@ -134,7 +134,7 @@ def test_array_slice_scalar(client, start, stop): [1, 3, 4, 11, -11], ) def test_array_index(t, df, index): - expr = t[t.array_of_float64[index].name("indexed")] + expr = t.select(t.array_of_float64[index].name("indexed")) result = expr.execute() expected = pd.DataFrame( { diff --git a/ibis/backends/dask/tests/test_join.py b/ibis/backends/dask/tests/test_join.py index 75b1235d5182..9614c00fd598 100644 --- a/ibis/backends/dask/tests/test_join.py +++ b/ibis/backends/dask/tests/test_join.py @@ -30,9 +30,9 @@ @join_type def test_join(how, left, right, df1, df2): - expr = left.join(right, left.key == right.key, how=how)[ + expr = left.join(right, left.key == right.key, how=how).select( left, right.other_value, right.key3 - ] + ) result = expr.compile() expected = dd.merge(df1, df2, how=how, on="key") tm.assert_frame_equal( @@ -43,7 +43,7 @@ def test_join(how, left, right, df1, df2): @join_type def test_join_project_left_table(how, left, right, df1, df2): - expr = left.join(right, left.key == right.key, how=how)[left, right.key3] + expr = left.join(right, left.key == right.key, how=how).select(left, right.key3) result = expr.compile() expected = dd.merge(df1, df2, how=how, on="key")[list(left.columns) + ["key3"]] tm.assert_frame_equal( @@ -81,7 +81,7 @@ def test_join_with_duplicate_non_key_columns(how, left, right, df1, df2): @join_type def test_join_with_post_expression_selection(how, left, right, df1, df2): join = left.join(right, left.key == right.key, how=how) - expr = join[left.key, left.value, right.other_value] + expr = join.select(left.key, left.value, right.other_value) result = expr.compile() expected = dd.merge(df1, df2, on="key", how=how)[["key", "value", "other_value"]] tm.assert_frame_equal( @@ -96,8 +96,8 @@ def test_join_with_post_expression_filter(how, left): rhs = left[["key2", "value"]] joined = lhs.join(rhs, "key2", how=how) - projected = joined[lhs, rhs.value] - expr = projected[projected.value == 4] + projected = joined.select(lhs, rhs.value) + expr = projected.filter(projected.value == 4) result = expr.compile() df1 = lhs.compile() @@ -118,12 +118,12 @@ def test_multi_join_with_post_expression_filter(how, left, df1): rhs2 = left[["key2", "value"]].rename(value2="value") joined = lhs.join(rhs, "key2", how=how) - projected = joined[lhs, rhs.value] - filtered = projected[projected.value == 4] + projected = joined.select(lhs, rhs.value) + filtered = projected.filter(projected.value == 4) joined2 = filtered.join(rhs2, "key2") - projected2 = joined2[filtered.key, rhs2.value2] - expr = projected2[projected2.value2 == 3] + projected2 = joined2.select(filtered.key, rhs2.value2) + expr = projected2.filter(projected2.value2 == 3) result = expr.compile() @@ -145,7 +145,7 @@ def test_multi_join_with_post_expression_filter(how, left, df1): def test_join_with_non_trivial_key(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter join = left.join(right, right.key.length() == left.key.length(), how=how) - expr = join[left.key, left.value, right.other_value] + expr = join.select(left.key, left.value, right.other_value) result = expr.compile() expected = ( @@ -168,8 +168,8 @@ def test_join_with_non_trivial_key(how, left, right, df1, df2): def test_join_with_non_trivial_key_project_table(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter join = left.join(right, right.key.length() == left.key.length(), how=how) - expr = join[left, right.other_value] - expr = expr[expr.key.length() == 1] + expr = join.select(left, right.other_value) + expr = expr.filter(expr.key.length() == 1) result = expr.compile() expected = ( @@ -194,7 +194,7 @@ def test_join_with_project_right_duplicate_column(client, how, left, df1, df3): # also test that the order of operands in the predicate doesn't matter right = client.table("df3") join = left.join(right, ["key"], how=how) - expr = join[left.key, right.key2, right.other_value] + expr = join.select(left.key, right.key2, right.other_value) result = expr.compile() expected = ( @@ -216,7 +216,9 @@ def test_join_with_project_right_duplicate_column(client, how, left, df1, df3): @merge_asof_minversion def test_asof_join(time_left, time_right, time_df1, time_df2): - expr = time_left.asof_join(time_right, "time")[time_left, time_right.other_value] + expr = time_left.asof_join(time_right, "time").select( + time_left, time_right.other_value + ) result = expr.compile() expected = dd.merge_asof(time_df1, time_df2, on="time") tm.assert_frame_equal( @@ -229,9 +231,9 @@ def test_asof_join(time_left, time_right, time_df1, time_df2): def test_keyed_asof_join( time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 ): - expr = time_keyed_left.asof_join(time_keyed_right, "time", predicates="key")[ + expr = time_keyed_left.asof_join(time_keyed_right, "time", predicates="key").select( time_keyed_left, time_keyed_right.other_value - ] + ) result = expr.compile() expected = dd.merge_asof(time_keyed_df1, time_keyed_df2, on="time", by="key") tm.assert_frame_equal( diff --git a/ibis/backends/dask/tests/test_operations.py b/ibis/backends/dask/tests/test_operations.py index cf6bd9a9eb04..d1979bec7149 100644 --- a/ibis/backends/dask/tests/test_operations.py +++ b/ibis/backends/dask/tests/test_operations.py @@ -32,7 +32,9 @@ def test_literal(client): def test_selection(t, df): - expr = t[((t.plain_strings == "a") | (t.plain_int64 == 3)) & (t.dup_strings == "d")] + expr = t.filter( + ((t.plain_strings == "a") | (t.plain_int64 == 3)) & (t.dup_strings == "d") + ) result = expr.compile() expected = df[ ((df.plain_strings == "a") | (df.plain_int64 == 3)) & (df.dup_strings == "d") @@ -56,12 +58,10 @@ def test_mutate(t, df): @pytest.mark.xfail(reason="TODO - windowing - #2553") def test_project_scope_does_not_override(t, df): col = t.plain_int64 - expr = t[ - [ - col.name("new_col"), - col.sum().over(ibis.window(group_by="dup_strings")).name("grouped"), - ] - ] + expr = t.select( + col.name("new_col"), + col.sum().over(ibis.window(group_by="dup_strings")).name("grouped"), + ) result = expr.compile() expected = dd.concat( [ @@ -402,7 +402,7 @@ def test_nullif_inf(con): def test_group_concat(t, df): expr = ( - t[t.dup_ints == 1] + t.filter(t.dup_ints == 1) .group_by(t.dup_strings) .aggregate(foo=t.dup_ints.group_concat(",")) ) diff --git a/ibis/backends/dask/tests/test_window.py b/ibis/backends/dask/tests/test_window.py index c8c116170300..f810215e53a6 100644 --- a/ibis/backends/dask/tests/test_window.py +++ b/ibis/backends/dask/tests/test_window.py @@ -161,7 +161,7 @@ def test_players(players, players_df): def test_batting_filter_mean(batting, batting_df): - expr = batting[batting.G > batting.G.mean()] + expr = batting.filter(batting.G > batting.G.mean()) result = expr.execute() expected = ( batting_df[batting_df.G > batting_df.G.mean()].reset_index(drop=True).compute() @@ -348,7 +348,7 @@ def test_mutate_with_window_after_join(con, sort_kind): right = ibis.memtable(right_df) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] + proj = joined.select(left, right.value) expr = proj.group_by("ints").mutate(sum=proj.value.sum()) result = con.execute(expr) expected = pd.DataFrame( @@ -380,7 +380,7 @@ def test_mutate_scalar_with_window_after_join(npartitions): left, right = map(con.table, ("left", "right")) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] + proj = joined.select(left, right.value) expr = proj.mutate(sum=proj.value.sum(), const=ibis.literal(1)) result = expr.execute() result = result.sort_values(["ints", "value"]).reset_index(drop=True) @@ -415,8 +415,8 @@ def test_project_scalar_after_join(npartitions): left, right = map(con.table, ("left", "right")) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] - expr = proj[proj.value.sum().name("sum"), ibis.literal(1).name("const")] + proj = joined.select(left, right.value) + expr = proj.select(proj.value.sum().name("sum"), ibis.literal(1).name("const")) result = expr.execute().reset_index(drop=True) expected = pd.DataFrame( { diff --git a/ibis/backends/flink/tests/test_compiler.py b/ibis/backends/flink/tests/test_compiler.py index a39e0629ef6f..ed7ea10773ba 100644 --- a/ibis/backends/flink/tests/test_compiler.py +++ b/ibis/backends/flink/tests/test_compiler.py @@ -37,9 +37,9 @@ def test_complex_projections(simple_table, assert_sql): def test_filter(simple_table, assert_sql): - expr = simple_table[ + expr = simple_table.filter( ((simple_table.c > 0) | (simple_table.c < 0)) & simple_table.g.isin(["A", "B"]) - ] + ) assert_sql(expr) diff --git a/ibis/backends/impala/tests/test_bucket_histogram.py b/ibis/backends/impala/tests/test_bucket_histogram.py index af1ca0591a14..344b6ec99da5 100644 --- a/ibis/backends/impala/tests/test_bucket_histogram.py +++ b/ibis/backends/impala/tests/test_bucket_histogram.py @@ -84,6 +84,6 @@ def test_bucket_assign_labels(table, snapshot): labelled = size.tier.label( ["Under 0", "0 to 10", "10 to 25", "25 to 50"], nulls="error" ).name("tier2") - expr = size[labelled, size[1]] + expr = size.select(labelled, size[1]) snapshot.assert_match(translate(expr), "out.sql") diff --git a/ibis/backends/impala/tests/test_client.py b/ibis/backends/impala/tests/test_client.py index 680d6110c1c0..212aaf3b98a4 100644 --- a/ibis/backends/impala/tests/test_client.py +++ b/ibis/backends/impala/tests/test_client.py @@ -88,16 +88,18 @@ def test_adapt_scalar_array_results(con, alltypes): def test_interactive_repr_call_failure(con): t = con.table("lineitem").limit(100000) - t = t[t, t.l_receiptdate.cast("timestamp").name("date")] + t = t.select(t, t.l_receiptdate.cast("timestamp").name("date")) keys = [t.date.year().name("year"), "l_linestatus"] filt = t.l_linestatus.isin(["F"]) - expr = t[filt].group_by(keys).aggregate(t.l_extendedprice.mean().name("avg_px")) + expr = ( + t.filter(filt).group_by(keys).aggregate(t.l_extendedprice.mean().name("avg_px")) + ) w2 = ibis.trailing_window(9, group_by=expr.l_linestatus, order_by=expr.year) metric = expr["avg_px"].mean().over(w2) - enriched = expr[expr, metric] + enriched = expr.select(expr, metric) with config.option_context("interactive", True): repr(enriched) diff --git a/ibis/backends/impala/tests/test_ddl.py b/ibis/backends/impala/tests/test_ddl.py index 71273d06624a..fd12a69a0be9 100644 --- a/ibis/backends/impala/tests/test_ddl.py +++ b/ibis/backends/impala/tests/test_ddl.py @@ -159,19 +159,21 @@ def test_insert_validate_types(con, alltypes, test_data_db, temp_table): t = con.table(temp_table, database=db) - to_insert = expr[ + to_insert = expr.select( expr.tinyint_col, expr.smallint_col.name("int_col"), expr.string_col - ] + ) t.insert(to_insert.limit(10)) - to_insert = expr[ + to_insert = expr.select( expr.tinyint_col, expr.smallint_col.cast("int32").name("int_col"), expr.string_col, - ] + ) t.insert(to_insert.limit(10)) - to_insert = expr[expr.tinyint_col, expr.bigint_col.name("int_col"), expr.string_col] + to_insert = expr.select( + expr.tinyint_col, expr.bigint_col.name("int_col"), expr.string_col + ) limit_expr = to_insert.limit(10) with pytest.raises(com.IbisError): @@ -296,7 +298,7 @@ def test_query_delimited_file_directory(con, test_data_dir, temp_table): table = con.delimited_file(hdfs_path, schema, name=temp_table, delimiter=",") expr = ( - table[table.bar > 0] + table.filter(table.bar > 0) .group_by("foo") .aggregate( [ diff --git a/ibis/backends/impala/tests/test_ddl_compilation.py b/ibis/backends/impala/tests/test_ddl_compilation.py index 929075d92aa6..d6f386ec0cbe 100644 --- a/ibis/backends/impala/tests/test_ddl_compilation.py +++ b/ibis/backends/impala/tests/test_ddl_compilation.py @@ -168,7 +168,7 @@ def _get_ddl_string(props): @pytest.fixture def expr(t): - return t[t.bigint_col > 0] + return t.filter(t.bigint_col > 0) def test_create_external_table_as(mockcon, snapshot): diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py index 5b2557dd4fdd..7472e4973448 100644 --- a/ibis/backends/impala/tests/test_exprs.py +++ b/ibis/backends/impala/tests/test_exprs.py @@ -17,7 +17,7 @@ def test_embedded_identifier_quoting(alltypes): t = alltypes - expr = t[[(t.double_col * 2).name("double(fun)")]]["double(fun)"].sum() + expr = t.select((t.double_col * 2).name("double(fun)"))["double(fun)"].sum() expr.execute() @@ -134,7 +134,7 @@ def test_builtins(con, alltypes): proj_exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)] - projection = table[proj_exprs] + projection = table.select(proj_exprs) projection.limit(10).execute() _check_impala_output_types_match(con, projection) @@ -352,7 +352,7 @@ def test_filter_predicates(con): expr = t for pred in predicates: - expr = expr[pred(expr)].select(expr) + expr = expr.filter(pred(expr)).select(expr) expr.execute() @@ -420,7 +420,7 @@ def test_decimal_timestamp_builtins(con): proj_exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)] - projection = table[proj_exprs].limit(10) + projection = table.select(proj_exprs).limit(10) projection.execute() @@ -520,7 +520,7 @@ def test_analytic_functions(alltypes): def test_anti_join_self_reference_works(con, alltypes): t = alltypes.limit(100) t2 = t.view() - case = t[-((t.string_col == t2.string_col).any())] + case = t.filter(~((t.string_col == t2.string_col).any())) con.explain(case) @@ -540,7 +540,8 @@ def test_tpch_self_join_failure(con): joined_all = ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) - .join(orders, orders.o_custkey == customer.c_custkey)[fields_of_interest] + .join(orders, orders.o_custkey == customer.c_custkey) + .select(fields_of_interest) ) year = joined_all.odate.year().name("year") @@ -554,7 +555,7 @@ def test_tpch_self_join_failure(con): yoy = current.join( prior, ((current.region == prior.region) & (current.year == (prior.year - 1))), - )[current.region, current.year, yoy_change] + ).select(current.region, current.year, yoy_change) # no analysis failure con.explain(yoy) @@ -577,14 +578,15 @@ def test_tpch_correlated_subquery_failure(con): tpch = ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) - .join(orders, orders.o_custkey == customer.c_custkey)[fields_of_interest] + .join(orders, orders.o_custkey == customer.c_custkey) + .select(fields_of_interest) ) t2 = tpch.view() - conditional_avg = t2[(t2.region == tpch.region)].amount.mean() + conditional_avg = t2.filter(t2.region == tpch.region).amount.mean() amount_filter = tpch.amount > conditional_avg - expr = tpch[amount_filter].limit(0) + expr = tpch.filter(amount_filter).limit(0) # impala can't plan this because its correlated subquery implementation is # broken: it cannot detect the outer reference inside the inner query @@ -622,7 +624,7 @@ def test_unions_with_ctes(con, alltypes): ) expr2 = expr1.view() - join1 = expr1.join(expr2, expr1.string_col == expr2.string_col)[[expr1]] + join1 = expr1.join(expr2, expr1.string_col == expr2.string_col).select(expr1) join2 = join1.view() expr = join1.union(join2) @@ -665,12 +667,12 @@ def test_where_with_timestamp(snapshot): def test_filter_with_analytic(snapshot): x = ibis.table(ibis.schema([("col", "int32")]), "x") - with_filter_col = x[x.columns + [ibis.null().name("filter")]] - filtered = with_filter_col[with_filter_col["filter"].isnull()] - subquery = filtered[filtered.columns] + with_filter_col = x.select(x.columns + [ibis.null().name("filter")]) + filtered = with_filter_col.filter(with_filter_col["filter"].isnull()) + subquery = filtered.select(filtered.columns) - with_analytic = subquery[["col", subquery.count().name("analytic")]] - expr = with_analytic[with_analytic.columns] + with_analytic = subquery.select("col", subquery.count().name("analytic")) + expr = with_analytic.select(with_analytic.columns) snapshot.assert_match(ibis.impala.compile(expr), "out.sql") diff --git a/ibis/backends/impala/tests/test_in_not_in.py b/ibis/backends/impala/tests/test_in_not_in.py index ceeb3aebe002..c3f65230b7e5 100644 --- a/ibis/backends/impala/tests/test_in_not_in.py +++ b/ibis/backends/impala/tests/test_in_not_in.py @@ -33,6 +33,6 @@ def test_literal_in_fields(table, method_name, snapshot): def test_isin_notin_in_select(table, method_name, snapshot): values = ["foo", "bar"] method = getattr(table.g, method_name) - filtered = table[method(values)] + filtered = table.filter(method(values)) result = translate(filtered) snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/impala/tests/test_partition.py b/ibis/backends/impala/tests/test_partition.py index a44ff8921364..52fe8a9b8bb5 100644 --- a/ibis/backends/impala/tests/test_partition.py +++ b/ibis/backends/impala/tests/test_partition.py @@ -111,7 +111,9 @@ def test_insert_select_partitioned_table(con, df, temp_table, unpart_t): unique_keys = df[part_keys].drop_duplicates() for i, (year, month) in enumerate(unique_keys.itertuples(index=False)): - select_stmt = unpart_t[(unpart_t.year == year) & (unpart_t.month == month)] + select_stmt = unpart_t.filter( + (unpart_t.year == year) & (unpart_t.month == month) + ) # test both styles of insert if i: @@ -132,7 +134,7 @@ def tmp_parted(con): def test_create_partitioned_table_from_expr(con, alltypes, tmp_parted): t = alltypes - expr = t[t.id <= 10][["id", "double_col", "month", "year"]] + expr = t.filter(t.id <= 10)[["id", "double_col", "month", "year"]] name = tmp_parted con.create_table(name, expr, partition=[t.year]) new = con.table(name) diff --git a/ibis/backends/impala/tests/test_sql.py b/ibis/backends/impala/tests/test_sql.py index 65c125a8f457..a5f72375542e 100644 --- a/ibis/backends/impala/tests/test_sql.py +++ b/ibis/backends/impala/tests/test_sql.py @@ -28,7 +28,7 @@ def test_join_no_predicates_for_impala(con, join_type, snapshot): t1 = con.table("star1") t2 = con.table("star2") - joined = getattr(t1, join_type)(t2)[[t1]] + joined = getattr(t1, join_type)(t2).select(t1) result = ibis.to_sql(joined, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -76,8 +76,8 @@ def test_nested_join_multiple_ctes(snapshot): movies = ibis.table(dict(movieid="int64", title="string"), name="movies") expr = ratings.timestamp.cast("timestamp") - ratings2 = ratings["userid", "movieid", "rating", expr.name("datetime")] - joined2 = ratings2.join(movies, ["movieid"])[ratings2, movies["title"]] + ratings2 = ratings.select("userid", "movieid", "rating", expr.name("datetime")) + joined2 = ratings2.join(movies, ["movieid"]).select(ratings2, movies["title"]) joined3 = joined2.filter([joined2.userid == 118205, joined2.datetime.year() > 2001]) top_user_old_movie_ids = joined3.filter( [joined3.userid == 118205, joined3.datetime.year() < 2009] @@ -85,7 +85,7 @@ def test_nested_join_multiple_ctes(snapshot): # projection from a filter was hiding an insidious bug, so we're disabling # that for now see issue #1295 cond = joined3.movieid.isin(top_user_old_movie_ids.movieid) - result = joined3[cond] + result = joined3.filter(cond) compiled_result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(compiled_result, "out.sql") @@ -109,7 +109,7 @@ def test_join_with_nested_or_condition(snapshot): t2 = t1.view() joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) | (t1.b != t2.a)]) - expr = joined[t1] + expr = joined.select(t1) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -119,7 +119,7 @@ def test_join_with_nested_xor_condition(snapshot): t2 = t1.view() joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)]) - expr = joined[t1] + expr = joined.select(t1) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -128,7 +128,7 @@ def test_join_with_nested_xor_condition(snapshot): def test_is_parens(method, snapshot): t = ibis.table([("a", "string"), ("b", "string")], "table") func = operator.methodcaller(method) - expr = t[func(t.a) == func(t.b)] + expr = t.filter(func(t.a) == func(t.b)) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -136,7 +136,7 @@ def test_is_parens(method, snapshot): def test_is_parens_identical_to(snapshot): t = ibis.table([("a", "string"), ("b", "string")], "table") - expr = t[t.a.identical_to(None) == t.b.identical_to(None)] + expr = t.filter(t.a.identical_to(None) == t.b.identical_to(None)) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -147,37 +147,37 @@ def test_join_aliasing(snapshot): [("a", "int64"), ("b", "int64"), ("c", "int64")], name="test_table" ) test = test.mutate(d=test.a + 20) - test2 = test[test.d, test.c] + test2 = test.select(test.d, test.c) idx = (test2.d / 15).cast("int64").name("idx") test3 = test2.group_by([test2.d, idx, test2.c]).aggregate(row_count=test2.count()) test3_totals = test3.group_by(test3.d).aggregate(total=test3.row_count.sum()) - test4 = test3.join(test3_totals, test3.d == test3_totals.d)[ + test4 = test3.join(test3_totals, test3.d == test3_totals.d).select( test3, test3_totals.total - ] - test5 = test4[test4.row_count < test4.total / 2] + ) + test5 = test4.filter(test4.row_count < test4.total / 2) agg = ( test.group_by([test.d, test.b]) .aggregate(count=test.count(), unique=test.c.nunique()) .view() ) - result = agg.join(test5, agg.d == test5.d)[agg, test5.total] + result = agg.join(test5, agg.d == test5.d).select(agg, test5.total) result = ibis.to_sql(result, dialect="impala") snapshot.assert_match(result, "out.sql") def test_multiple_filters(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], name="t0") - filt = t[t.a < 100] - expr = filt[filt.a == filt.a.max()] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a == filt.a.max()) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") def test_multiple_filters2(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], name="t0") - filt = t[t.a < 100] - expr = filt[filt.a == filt.a.max()] - expr = expr[expr.b == "a"] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a == filt.a.max()) + expr = expr.filter(expr.b == "a") result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -250,7 +250,8 @@ def tpch(region, nation, customer, orders): return ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) - .join(orders, orders.o_custkey == customer.c_custkey)[fields_of_interest] + .join(orders, orders.o_custkey == customer.c_custkey) + .select(fields_of_interest) ) @@ -259,18 +260,20 @@ def test_join_key_name(tpch, snapshot): pre_sizes = tpch.group_by(year).size() t2 = tpch.view() - conditional_avg = t2[t2.region == tpch.region].o_totalprice.mean().name("mean") + conditional_avg = ( + t2.filter(t2.region == tpch.region).o_totalprice.mean().name("mean") + ) amount_filter = tpch.o_totalprice > conditional_avg - post_sizes = tpch[amount_filter].group_by(year).size() + post_sizes = tpch.filter(amount_filter).group_by(year).size() percent = (post_sizes[1] / pre_sizes[1].cast("double")).name("fraction") - expr = pre_sizes.join(post_sizes, pre_sizes.year == post_sizes.year)[ + expr = pre_sizes.join(post_sizes, pre_sizes.year == post_sizes.year).select( pre_sizes.year, pre_sizes[1].name("pre_count"), post_sizes[1].name("post_count"), percent, - ] + ) result = ibis.impala.compile(expr) snapshot.assert_match(result, "out.sql") @@ -281,11 +284,11 @@ def test_join_key_name2(tpch, snapshot): pre_sizes = tpch.group_by(year).size() post_sizes = tpch.group_by(year).size().view() - expr = pre_sizes.join(post_sizes, pre_sizes.year == post_sizes.year)[ + expr = pre_sizes.join(post_sizes, pre_sizes.year == post_sizes.year).select( pre_sizes.year, pre_sizes[1].name("pre_count"), post_sizes[1].name("post_count"), - ] + ) result = ibis.impala.compile(expr) snapshot.assert_match(result, "out.sql") diff --git a/ibis/backends/impala/tests/test_value_exprs.py b/ibis/backends/impala/tests/test_value_exprs.py index becef1317143..f590e87df5bb 100644 --- a/ibis/backends/impala/tests/test_value_exprs.py +++ b/ibis/backends/impala/tests/test_value_exprs.py @@ -175,11 +175,11 @@ def test_timestamp_extract_field(table, field, snapshot): def test_sql_extract(table, snapshot): # integration with SQL translation - expr = table[ + expr = table.select( table.i.year().name("year"), table.i.month().name("month"), table.i.day().name("day"), - ] + ) result = ibis.to_sql(expr, dialect="impala") snapshot.assert_match(result, "out.sql") @@ -252,8 +252,8 @@ def test_correlated_predicate_subquery(table, snapshot): t1 = t0.view() # both are valid constructions - expr1 = t0[t0.g == t1.g] - expr2 = t1[t0.g == t1.g] + expr1 = t0.filter(t0.g == t1.g) + expr2 = t1.filter(t0.g == t1.g) snapshot.assert_match(translate(expr1), "out1.sql") snapshot.assert_match(translate(expr2), "out2.sql") diff --git a/ibis/backends/impala/tests/test_window.py b/ibis/backends/impala/tests/test_window.py index aeac63b38ee6..0d9356e282a6 100644 --- a/ibis/backends/impala/tests/test_window.py +++ b/ibis/backends/impala/tests/test_window.py @@ -22,7 +22,7 @@ def assert_sql_equal(expr, snapshot, out="out.sql"): def test_aggregate_in_projection(alltypes, snapshot): t = alltypes - proj = t[t, (t.f / t.f.sum()).name("normed_f")] + proj = t.select(t, (t.f / t.f.sum()).name("normed_f")) assert_sql_equal(proj, snapshot) @@ -93,7 +93,7 @@ def test_nested_analytic_function(alltypes, snapshot): def test_rank_functions(alltypes, snapshot): t = alltypes - proj = t[t.g, t.f.rank().name("minr"), t.f.dense_rank().name("denser")] + proj = t.select(t.g, t.f.rank().name("minr"), t.f.dense_rank().name("denser")) assert_sql_equal(proj, snapshot) @@ -113,7 +113,7 @@ def test_order_by_desc(alltypes, snapshot): w = window(order_by=ibis.desc(t.f)) - proj = t[t.f, ibis.row_number().over(w).name("revrank")] + proj = t.select(t.f, ibis.row_number().over(w).name("revrank")) assert_sql_equal(proj, snapshot, "out1.sql") expr = t.group_by("g").order_by(ibis.desc(t.f))[t.d.lag().name("foo"), t.a.max()] diff --git a/ibis/backends/mssql/tests/test_client.py b/ibis/backends/mssql/tests/test_client.py index 24a8a2fb1a4d..95b12c2972b7 100644 --- a/ibis/backends/mssql/tests/test_client.py +++ b/ibis/backends/mssql/tests/test_client.py @@ -159,7 +159,7 @@ def count_big(x, where: bool = True) -> int: expr = count_big(ft.id) expr = count_big(ft.id, where=ft.id == 1) - assert expr.execute() == ft[ft.id == 1].count().execute() + assert expr.execute() == ft.filter(ft.id == 1).count().execute() @pytest.mark.parametrize("string", ["a", " ", "a ", " a", ""]) diff --git a/ibis/backends/pandas/tests/test_arrays.py b/ibis/backends/pandas/tests/test_arrays.py index 98d1bb6fcd8d..9b657eb9cf3c 100644 --- a/ibis/backends/pandas/tests/test_arrays.py +++ b/ibis/backends/pandas/tests/test_arrays.py @@ -74,7 +74,7 @@ def test_array_collect_grouped(t, df): def test_array_collect_rolling_partitioned(t, df): window = ibis.trailing_window(1, order_by=t.plain_int64) colexpr = t.plain_float64.collect().over(window) - expr = t["dup_strings", "plain_int64", colexpr.name("collected")] + expr = t.select("dup_strings", "plain_int64", colexpr.name("collected")) result = expr.execute() expected = pd.DataFrame( { @@ -134,7 +134,7 @@ def test_array_slice_scalar(client, start, stop): @pytest.mark.parametrize("index", [1, 3, 4, 11, -11]) def test_array_index(t, df, index): - expr = t[t.array_of_float64[index].name("indexed")] + expr = t.select(t.array_of_float64[index].name("indexed")) result = expr.execute() expected = pd.DataFrame( { diff --git a/ibis/backends/pandas/tests/test_join.py b/ibis/backends/pandas/tests/test_join.py index c4f730e84ea0..4d44efd1c63a 100644 --- a/ibis/backends/pandas/tests/test_join.py +++ b/ibis/backends/pandas/tests/test_join.py @@ -17,16 +17,16 @@ @mutating_join_type def test_join(how, left, right, df1, df2): - expr = left.join(right, left.key == right.key, how=how)[ + expr = left.join(right, left.key == right.key, how=how).select( left, right.other_value, right.key3 - ] + ) result = expr.execute() expected = pd.merge(df1, df2, how=how, on="key") tm.assert_frame_equal(result[expected.columns], expected) def test_cross_join(left, right, df1, df2): - expr = left.cross_join(right)[left, right.other_value, right.key3] + expr = left.cross_join(right).select(left, right.other_value, right.key3) result = expr.execute() expected = pd.merge( df1.assign(dummy=1), df2.assign(dummy=1), how="inner", on="dummy" @@ -37,14 +37,14 @@ def test_cross_join(left, right, df1, df2): @mutating_join_type def test_join_project_left_table(how, left, right, df1, df2): - expr = left.join(right, left.key == right.key, how=how)[left, right.key3] + expr = left.join(right, left.key == right.key, how=how).select(left, right.key3) result = expr.execute() expected = pd.merge(df1, df2, how=how, on="key")[list(left.columns) + ["key3"]] tm.assert_frame_equal(result[expected.columns], expected) def test_cross_join_project_left_table(left, right, df1, df2): - expr = left.cross_join(right)[left, right.key3] + expr = left.cross_join(right).select(left, right.key3) result = expr.execute() expected = pd.merge( df1.assign(dummy=1), df2.assign(dummy=1), how="inner", on="dummy" @@ -67,9 +67,9 @@ def test_cross_join_project_left_table(left, right, df1, df2): ], ) def test_join_with_multiple_predicates(how, left, right, df1, df2): - expr = left.join(right, [left.key == right.key, left.key2 == right.key3], how=how)[ - left, right.key3, right.other_value - ] + expr = left.join( + right, [left.key == right.key, left.key2 == right.key3], how=how + ).select(left, right.key3, right.other_value) result = expr.execute() expected = pd.merge( df1, @@ -110,7 +110,9 @@ def test_join_with_multiple_predicates(how, left, right, df1, df2): ) def test_join_with_multiple_predicates_written_as_one(how, left, right, df1, df2): predicate = (left.key == right.key) & (left.key2 == right.key3) - expr = left.join(right, predicate, how=how)[left, right.key3, right.other_value] + expr = left.join(right, predicate, how=how).select( + left, right.key3, right.other_value + ) result = expr.execute() expected = pd.merge( df1, df2, how=how, left_on=["key", "key2"], right_on=["key", "key3"] @@ -155,7 +157,9 @@ def test_join_with_duplicate_non_key_columns_not_selected(how, left, right, df1, left = left.mutate(x=left.value * 2) right = right.mutate(x=right.other_value * 3) right = right[["key", "other_value"]] - expr = left.join(right, left.key == right.key, how=how)[left, right.other_value] + expr = left.join(right, left.key == right.key, how=how).select( + left, right.other_value + ) result = expr.execute() expected = pd.merge( df1.assign(x=df1.value * 2), @@ -169,7 +173,7 @@ def test_join_with_duplicate_non_key_columns_not_selected(how, left, right, df1, @mutating_join_type def test_join_with_post_expression_selection(how, left, right, df1, df2): join = left.join(right, left.key == right.key, how=how) - expr = join[left.key, left.value, right.other_value] + expr = join.select(left.key, left.value, right.other_value) result = expr.execute() expected = pd.merge(df1, df2, on="key", how=how)[["key", "value", "other_value"]] tm.assert_frame_equal(result[expected.columns], expected) @@ -181,8 +185,8 @@ def test_join_with_post_expression_filter(how, left): rhs = left[["key2", "value"]] joined = lhs.join(rhs, "key2", how=how) - projected = joined[lhs, rhs.value] - expr = projected[projected.value == 4] + projected = joined.select(lhs, rhs.value) + expr = projected.filter(projected.value == 4) result = expr.execute() df1 = lhs.execute() @@ -200,12 +204,12 @@ def test_multi_join_with_post_expression_filter(how, left, df1): rhs2 = left[["key2", "value"]].rename(value2="value") joined = lhs.join(rhs, "key2", how=how) - projected = joined[lhs, rhs.value] - filtered = projected[projected.value == 4] + projected = joined.select(lhs, rhs.value) + filtered = projected.filter(projected.value == 4) joined2 = filtered.join(rhs2, "key2") - projected2 = joined2[filtered.key, rhs2.value2] - expr = projected2[projected2.value2 == 3] + projected2 = joined2.select(filtered.key, rhs2.value2) + expr = projected2.filter(projected2.value2 == 3) result = expr.execute() @@ -224,7 +228,7 @@ def test_multi_join_with_post_expression_filter(how, left, df1): def test_join_with_non_trivial_key(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter join = left.join(right, right.key.length() == left.key.length(), how=how) - expr = join[left.key, left.value, right.other_value] + expr = join.select(left.key, left.value, right.other_value) result = expr.execute() expected = ( @@ -244,8 +248,8 @@ def test_join_with_non_trivial_key(how, left, right, df1, df2): def test_join_with_non_trivial_key_project_table(how, left, right, df1, df2): # also test that the order of operands in the predicate doesn't matter join = left.join(right, right.key.length() == left.key.length(), how=how) - expr = join[left, right.other_value] - expr = expr[expr.key.length() == 1] + expr = join.select(left, right.other_value) + expr = expr.filter(expr.key.length() == 1) result = expr.execute() expected = ( @@ -267,7 +271,7 @@ def test_join_with_project_right_duplicate_column(client, how, left, df1, df3): # also test that the order of operands in the predicate doesn't matter right = client.table("df3") join = left.join(right, ["key"], how=how) - expr = join[left.key, right.key2, right.other_value] + expr = join.select(left.key, right.key2, right.other_value) result = expr.execute() expected = ( @@ -283,7 +287,7 @@ def test_join_with_window_function(players_base, players_df, batting, batting_df # this should be semi_join tbl = batting.left_join(players, ["playerID"]) - t = tbl[batting.G, batting.playerID, batting.teamID] + t = tbl.select(batting.G, batting.playerID, batting.teamID) expr = t.group_by(t.teamID).mutate( team_avg=lambda d: d.G.mean(), demeaned_by_player=lambda d: d.G - d.G.mean(), diff --git a/ibis/backends/pandas/tests/test_operations.py b/ibis/backends/pandas/tests/test_operations.py index b116995c22bd..6e56472a9264 100644 --- a/ibis/backends/pandas/tests/test_operations.py +++ b/ibis/backends/pandas/tests/test_operations.py @@ -28,7 +28,9 @@ def test_literal(client): def test_selection(t, df): - expr = t[((t.plain_strings == "a") | (t.plain_int64 == 3)) & (t.dup_strings == "d")] + expr = t.filter( + ((t.plain_strings == "a") | (t.plain_int64 == 3)) & (t.dup_strings == "d") + ) result = expr.execute() expected = df[ ((df.plain_strings == "a") | (df.plain_int64 == 3)) & (df.dup_strings == "d") @@ -45,12 +47,10 @@ def test_mutate(t, df): def test_project_scope_does_not_override(t, df): col = t.plain_int64 - expr = t[ - [ - col.name("new_col"), - col.sum().over(ibis.window(group_by="dup_strings")).name("grouped"), - ] - ] + expr = t.select( + col.name("new_col"), + col.sum().over(ibis.window(group_by="dup_strings")).name("grouped"), + ) result = expr.execute() expected = pd.concat( [ diff --git a/ibis/backends/pandas/tests/test_window.py b/ibis/backends/pandas/tests/test_window.py index d588120b8fd4..a0cf0f4e3eed 100644 --- a/ibis/backends/pandas/tests/test_window.py +++ b/ibis/backends/pandas/tests/test_window.py @@ -172,7 +172,7 @@ def test_players(players, players_df): def test_batting_filter_mean(batting, batting_df): - expr = batting[batting.G > batting.G.mean()] + expr = batting.filter(batting.G > batting.G.mean()) result = expr.execute() expected = batting_df[batting_df.G > batting_df.G.mean()].reset_index(drop=True) tm.assert_frame_equal(result[expected.columns], expected) @@ -361,7 +361,7 @@ def test_mutate_with_window_after_join(sort_kind): left, right = map(con.table, ("left", "right")) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] + proj = joined.select(left, right.value) expr = proj.group_by("ints").mutate(sum=proj.value.sum()) result = expr.execute() expected = pd.DataFrame( @@ -390,7 +390,7 @@ def test_mutate_scalar_with_window_after_join(): left, right = map(con.table, ("left", "right")) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] + proj = joined.select(left, right.value) expr = proj.mutate(sum=proj.value.sum(), const=ibis.literal(1)) result = expr.execute() expected = pd.DataFrame( @@ -416,8 +416,8 @@ def test_project_scalar_after_join(): left, right = map(con.table, ("left", "right")) joined = left.outer_join(right, left.ints == right.group) - proj = joined[left, right.value] - expr = proj[proj.value.sum().name("sum"), ibis.literal(1).name("const")] + proj = joined.select(left, right.value) + expr = proj.select(proj.value.sum().name("sum"), ibis.literal(1).name("const")) result = expr.execute() expected = pd.DataFrame( { diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py index e0c148057ca2..0c277fa634b5 100644 --- a/ibis/backends/postgres/tests/test_functions.py +++ b/ibis/backends/postgres/tests/test_functions.py @@ -647,7 +647,7 @@ def test_not_exists(alltypes, df): t = alltypes t2 = t.view() - expr = t[~((t.string_col == t2.string_col).any())] + expr = t.filter(~((t.string_col == t2.string_col).any())) result = expr.execute() left, right = df, t2.execute() @@ -855,7 +855,7 @@ def test_window_with_arithmetic(alltypes, df): def test_anonymous_aggregate(alltypes, df): t = alltypes - expr = t[t.double_col > t.double_col.mean()] + expr = t.filter(t.double_col > t.double_col.mean()) result = expr.execute() expected = df[df.double_col > df.double_col.mean()].reset_index(drop=True) tm.assert_frame_equal(result, expected) @@ -908,7 +908,7 @@ def test_array_collect(array_types): @pytest.mark.parametrize("index", [0, 1, 3, 4, 11, -1, -3, -4, -11]) def test_array_index(array_types, index): - expr = array_types[array_types.y[index].name("indexed")] + expr = array_types.select(array_types.y[index].name("indexed")) result = expr.execute() expected = pd.DataFrame( { diff --git a/ibis/backends/postgres/tests/test_geospatial.py b/ibis/backends/postgres/tests/test_geospatial.py index 9821f9d3ec26..574cb1e6e6d8 100644 --- a/ibis/backends/postgres/tests/test_geospatial.py +++ b/ibis/backends/postgres/tests/test_geospatial.py @@ -232,7 +232,7 @@ def test_get_point(geotable, expr_fn, expected): # boundaries with the contains predicate. Work around this by adding a # small buffer. expr = geotable["geo_linestring"].buffer(0.01).contains(arg) - result = geotable[geotable, expr.name("tmp")].execute()["tmp"] + result = geotable.select(geotable, expr.name("tmp")).execute()["tmp"] testing.assert_almost_equal(result, expected, decimal=2) @@ -257,7 +257,7 @@ def test_area(con, geotable): ) def test_srid(geotable, condition, expected): """Testing for geo spatial srid operation.""" - expr = geotable[geotable.id, condition(geotable).name("tmp")] + expr = geotable.select(geotable.id, condition(geotable).name("tmp")) result = expr.execute()["tmp"][[0]] assert np.all(result == expected) @@ -275,7 +275,7 @@ def test_srid(geotable, condition, expected): ) def test_set_srid(geotable, condition, expected): """Testing for geo spatial set_srid operation.""" - expr = geotable[geotable.id, condition(geotable).name("tmp")] + expr = geotable.select(geotable.id, condition(geotable).name("tmp")) result = expr.execute()["tmp"][[0]] assert np.all(result == expected) @@ -305,7 +305,7 @@ def test_set_srid(geotable, condition, expected): ) def test_transform(geotable, condition, expected): """Testing for geo spatial transform operation.""" - expr = geotable[geotable.id, condition(geotable).name("tmp")] + expr = geotable.select(geotable.id, condition(geotable).name("tmp")) result = expr.execute()["tmp"][[0]] assert np.all(result == expected) @@ -325,7 +325,7 @@ def test_transform(geotable, condition, expected): def test_cast_geography(geotable, expr_fn): """Testing for geo spatial transform operation.""" p = expr_fn(geotable).cast("geography") - expr = geotable[geotable.id, p.distance(p).name("tmp")] + expr = geotable.select(geotable.id, p.distance(p).name("tmp")) result = expr.execute()["tmp"][[0]] # distance from a point to a same point should be 0 assert np.all(result == 0) @@ -346,7 +346,7 @@ def test_cast_geography(geotable, expr_fn): def test_cast_geometry(geotable, expr_fn): """Testing for geo spatial transform operation.""" p = expr_fn(geotable).cast("geometry") - expr = geotable[geotable.id, p.distance(p).name("tmp")] + expr = geotable.select(geotable.id, p.distance(p).name("tmp")) result = expr.execute()["tmp"][[0]] # distance from a point to a same point should be 0 assert np.all(result == 0) diff --git a/ibis/backends/postgres/tests/test_json.py b/ibis/backends/postgres/tests/test_json.py index 219562b81cb8..a3e11838a48c 100644 --- a/ibis/backends/postgres/tests/test_json.py +++ b/ibis/backends/postgres/tests/test_json.py @@ -23,7 +23,7 @@ def jsonb_t(con): @pytest.mark.parametrize("data", [param({"status": True}, id="status")]) def test_json(data, alltypes): lit = ibis.literal(json.dumps(data), type="json").name("tmp") - expr = alltypes[[alltypes.id, lit]].head(1) + expr = alltypes.select(alltypes.id, lit).head(1) df = expr.execute() assert df["tmp"].iloc[0] == data diff --git a/ibis/backends/postgres/tests/test_postgis.py b/ibis/backends/postgres/tests/test_postgis.py index c1c1b6484715..9a20356a2553 100644 --- a/ibis/backends/postgres/tests/test_postgis.py +++ b/ibis/backends/postgres/tests/test_postgis.py @@ -21,7 +21,7 @@ def test_load_geodata(con): def test_empty_select(geotable): - expr = geotable[geotable.geo_point.geo_equals(geotable.geo_linestring)] + expr = geotable.filter(geotable.geo_point.geo_equals(geotable.geo_linestring)) result = expr.execute() assert len(result) == 0 diff --git a/ibis/backends/postgres/tests/test_string.py b/ibis/backends/postgres/tests/test_string.py index d069c293f2f3..25237fb8a65d 100644 --- a/ibis/backends/postgres/tests/test_string.py +++ b/ibis/backends/postgres/tests/test_string.py @@ -15,6 +15,6 @@ @pytest.mark.usefixtures("con") def test_special_strings(alltypes, data, data_type): lit = ibis.literal(data, type=data_type).name("tmp") - expr = alltypes[[alltypes.id, lit]].head(1) + expr = alltypes.select(alltypes.id, lit).head(1) df = expr.execute() assert df["tmp"].iloc[0] == uuid.UUID(data) diff --git a/ibis/backends/postgres/tests/test_udf.py b/ibis/backends/postgres/tests/test_udf.py index 59a494a0cc5f..0c56392c04bf 100644 --- a/ibis/backends/postgres/tests/test_udf.py +++ b/ibis/backends/postgres/tests/test_udf.py @@ -85,7 +85,9 @@ def test_existing_sql_udf(con_for_udf, test_database, table): """Test creating ibis UDF object based on existing UDF in the database.""" # Create ibis UDF objects referring to UDFs already created in the database custom_length_udf = con_for_udf.function("custom_len", database=test_database) - result_obj = table[table, custom_length_udf(table["user_name"]).name("custom_len")] + result_obj = table.select( + table, custom_length_udf(table["user_name"]).name("custom_len") + ) result = result_obj.execute() assert result["custom_len"].sum() == result["name_length"].sum() @@ -93,7 +95,9 @@ def test_existing_sql_udf(con_for_udf, test_database, table): def test_existing_plpython_udf(con_for_udf, test_database, table): # Create ibis UDF objects referring to UDFs already created in the database py_length_udf = con_for_udf.function("pylen", database=test_database) - result_obj = table[table, py_length_udf(table["user_name"]).name("custom_len")] + result_obj = table.select( + table, py_length_udf(table["user_name"]).name("custom_len") + ) result = result_obj.execute() assert result["custom_len"].sum() == result["name_length"].sum() diff --git a/ibis/backends/pyspark/tests/test_array.py b/ibis/backends/pyspark/tests/test_array.py index 8d45e24e9358..b253b084cc70 100644 --- a/ibis/backends/pyspark/tests/test_array.py +++ b/ibis/backends/pyspark/tests/test_array.py @@ -82,7 +82,7 @@ def test_array_slice_scalar(con, start, stop): @pytest.mark.parametrize("index", [1, 3, 4, 11, -11]) def test_array_index(t, df, index): - expr = t[t.array_int[index].name("indexed")] + expr = t.select(t.array_int[index].name("indexed")) result = expr.execute() expected = pd.DataFrame( diff --git a/ibis/backends/pyspark/tests/test_ddl.py b/ibis/backends/pyspark/tests/test_ddl.py index 64720c06e8bf..975d2840ca0e 100644 --- a/ibis/backends/pyspark/tests/test_ddl.py +++ b/ibis/backends/pyspark/tests/test_ddl.py @@ -134,16 +134,16 @@ def test_insert_validate_types(con, alltypes, test_data_db, temp_table): database=db, ) - to_insert = expr[ + to_insert = expr.select( expr.tinyint_col, expr.smallint_col.name("int_col"), expr.string_col - ] + ) con.insert(temp_table, to_insert.limit(10)) - to_insert = expr[ + to_insert = expr.select( expr.tinyint_col, expr.smallint_col.cast("int32").name("int_col"), expr.string_col, - ] + ) con.insert(temp_table, to_insert.limit(10)) diff --git a/ibis/backends/pyspark/tests/test_null.py b/ibis/backends/pyspark/tests/test_null.py index 048330d6b39a..b4ee62dcb7e7 100644 --- a/ibis/backends/pyspark/tests/test_null.py +++ b/ibis/backends/pyspark/tests/test_null.py @@ -11,7 +11,7 @@ def test_isnull(con): table_pandas = table.execute() for col, _ in table_pandas.items(): - result = table[table[col].isnull()].execute().reset_index(drop=True) + result = table.filter(table[col].isnull()).execute().reset_index(drop=True) expected = table_pandas[table_pandas[col].isnull()].reset_index(drop=True) tm.assert_frame_equal(result, expected) @@ -21,6 +21,6 @@ def test_notnull(con): table_pandas = table.execute() for col, _ in table_pandas.items(): - result = table[table[col].notnull()].execute().reset_index(drop=True) + result = table.filter(table[col].notnull()).execute().reset_index(drop=True) expected = table_pandas[table_pandas[col].notnull()].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/risingwave/tests/test_functions.py b/ibis/backends/risingwave/tests/test_functions.py index 48550824022a..d26fc39f5390 100644 --- a/ibis/backends/risingwave/tests/test_functions.py +++ b/ibis/backends/risingwave/tests/test_functions.py @@ -448,7 +448,7 @@ def test_not_exists(alltypes, df): t = alltypes t2 = t.view() - expr = t[~((t.string_col == t2.string_col).any())] + expr = t.filter(~((t.string_col == t2.string_col).any())) result = expr.execute() left, right = df, t2.execute() @@ -615,7 +615,7 @@ def test_window_with_arithmetic(alltypes, df): def test_anonymous_aggregate(alltypes, df): t = alltypes - expr = t[t.double_col > t.double_col.mean()] + expr = t.filter(t.double_col > t.double_col.mean()) result = expr.execute() expected = df[df.double_col > df.double_col.mean()].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/ibis/backends/risingwave/tests/test_json.py b/ibis/backends/risingwave/tests/test_json.py index 6f6d04b58f7a..92cb197672f6 100644 --- a/ibis/backends/risingwave/tests/test_json.py +++ b/ibis/backends/risingwave/tests/test_json.py @@ -13,6 +13,6 @@ @pytest.mark.parametrize("data", [param({"status": True}, id="status")]) def test_json(data, alltypes): lit = ibis.literal(json.dumps(data), type="json").name("tmp") - expr = alltypes[[alltypes.id, lit]].head(1) + expr = alltypes.select(alltypes.id, lit).head(1) df = expr.execute() assert df["tmp"].iloc[0] == data diff --git a/ibis/backends/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py index 0a4f733971e4..a552cec35a4b 100644 --- a/ibis/backends/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -93,20 +93,20 @@ def t2(con): @pytest.fixture(scope="module") def not_exists(foo_t, bar_t): - return foo_t[-(foo_t.key1 == bar_t.key1).any()] + return foo_t.filter(~(foo_t.key1 == bar_t.key1).any()) @pytest.fixture(scope="module") def union(con): table = con.table("functional_alltypes") - t1 = table[table.int_col > 0][ + t1 = table.filter(table.int_col > 0).select( table.string_col.name("key"), table.float_col.cast("double").name("value"), - ] - t2 = table[table.int_col <= 0][ + ) + t2 = table.filter(table.int_col <= 0).select( table.string_col.name("key"), table.double_col.name("value") - ] + ) return t1.union(t2, distinct=True) @@ -115,13 +115,13 @@ def union(con): def union_all(con): table = con.table("functional_alltypes") - t1 = table[table.int_col > 0][ + t1 = table.filter(table.int_col > 0).select( table.string_col.name("key"), table.float_col.cast("double").name("value"), - ] - t2 = table[table.int_col <= 0][ + ) + t2 = table.filter(table.int_col <= 0).select( table.string_col.name("key"), table.double_col.name("value") - ] + ) return t1.union(t2, distinct=False) @@ -130,13 +130,13 @@ def union_all(con): def intersect(con): table = con.table("functional_alltypes") - t1 = table[table.int_col > 0][ + t1 = table.filter(table.int_col > 0).select( table.string_col.name("key"), table.float_col.cast("double").name("value"), - ] - t2 = table[table.int_col <= 0][ + ) + t2 = table.filter(table.int_col <= 0).select( table.string_col.name("key"), table.double_col.name("value") - ] + ) return t1.intersect(t2) @@ -145,13 +145,13 @@ def intersect(con): def difference(con): table = con.table("functional_alltypes") - t1 = table[table.int_col > 0][ + t1 = table.filter(table.int_col > 0).select( table.string_col.name("key"), table.float_col.cast("double").name("value"), - ] - t2 = table[table.int_col <= 0][ + ) + t2 = table.filter(table.int_col <= 0).select( table.string_col.name("key"), table.double_col.name("value") - ] + ) return t1.difference(t2) @@ -193,12 +193,12 @@ def projection_fuse_filter(): proj = t["a", "b", "c"] # Rewrite a little more aggressively here - expr1 = proj[t.a > 0] + expr1 = proj.filter(t.a > 0) # at one point these yielded different results - filtered = t[t.a > 0] + filtered = t.filter(t.a > 0) - expr2 = filtered[t.a, t.b, t.c] + expr2 = filtered.select(t.a, t.b, t.c) expr3 = filtered.select(["a", "b", "c"]) return expr1, expr2, expr3 diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index dc27463647b8..5ab96249b249 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -18,7 +18,7 @@ def test_union(union, snapshot): def test_union_project_column(union_all, snapshot): # select a column, get a subquery - expr = union_all[[union_all.key]] + expr = union_all.select(union_all.key) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) @@ -35,14 +35,14 @@ def test_table_difference(difference, snapshot): def test_intersect_project_column(intersect, snapshot): # select a column, get a subquery - expr = intersect[[intersect.key]] + expr = intersect.select(intersect.key) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) def test_difference_project_column(difference, snapshot): # select a column, get a subquery - expr = difference[[difference.key]] + expr = difference.select(difference.key) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) @@ -50,14 +50,14 @@ def test_difference_project_column(difference, snapshot): def test_table_distinct(con, snapshot): t = con.table("functional_alltypes") - expr = t[t.string_col, t.int_col].distinct() + expr = t.select(t.string_col, t.int_col).distinct() snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) def test_column_distinct(con, snapshot): t = con.table("functional_alltypes") - expr = t[t.string_col].distinct() + expr = t.select(t.string_col).distinct() snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -66,7 +66,7 @@ def test_count_distinct(con, snapshot): t = con.table("functional_alltypes") metric = t.int_col.nunique().name("nunique") - expr = t[t.bigint_col > 0].group_by("string_col").aggregate([metric]) + expr = t.filter(t.bigint_col > 0).group_by("string_col").aggregate([metric]) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -96,8 +96,8 @@ def test_pushdown_with_or(snapshot): ], "functional_alltypes", ) - subset = t[(t.double_col > 3.14) & t.string_col.contains("foo")] - expr = subset[(subset.int_col - 1 == 0) | (subset.float_col <= 1.34)] + subset = t.filter((t.double_col > 3.14) & t.string_col.contains("foo")) + expr = subset.filter((subset.int_col - 1 == 0) | (subset.float_col <= 1.34)) snapshot.assert_match(to_sql(expr), "out.sql") @@ -117,7 +117,7 @@ def test_having_size(snapshot): def test_having_from_filter(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], "t") - filt = t[t.b == "m"] + filt = t.filter(t.b == "m") gb = filt.group_by(filt.b) having = gb.having(filt.a.max() == 2) expr = having.aggregate(filt.a.sum().name("sum")) @@ -128,16 +128,16 @@ def test_having_from_filter(snapshot): def test_simple_agg_filter(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], name="my_table") - filt = t[t.a < 100] - expr = filt[filt.a == filt.a.max()] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a == filt.a.max()) snapshot.assert_match(to_sql(expr), "out.sql") def test_agg_and_non_agg_filter(snapshot): t = ibis.table([("a", "int64"), ("b", "string")], name="my_table") - filt = t[t.a < 100] - expr = filt[filt.a == filt.a.max()] - expr = expr[expr.b == "a"] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a == filt.a.max()) + expr = expr.filter(expr.b == "a") snapshot.assert_match(to_sql(expr), "out.sql") @@ -145,8 +145,8 @@ def test_agg_filter(snapshot): t = ibis.table([("a", "int64"), ("b", "int64")], name="my_table") t = t.mutate(b2=t.b * 2) t = t[["a", "b2"]] - filt = t[t.a < 100] - expr = filt[filt.a == filt.a.max().name("blah")] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a == filt.a.max().name("blah")) snapshot.assert_match(to_sql(expr), "out.sql") @@ -154,8 +154,8 @@ def test_agg_filter_with_alias(snapshot): t = ibis.table([("a", "int64"), ("b", "int64")], name="my_table") t = t.mutate(b2=t.b * 2) t = t[["a", "b2"]] - filt = t[t.a < 100] - expr = filt[filt.a.name("A") == filt.a.max().name("blah")] + filt = t.filter(t.a < 100) + expr = filt.filter(filt.a.name("A") == filt.a.max().name("blah")) snapshot.assert_match(to_sql(expr), "out.sql") @@ -169,7 +169,7 @@ def test_table_drop_with_filter(snapshot): right = ibis.table([("b", "string")], name="s") joined = left.join(right, left.b == right.b) - joined = joined[left.a] + joined = joined.select(left.a) expr = joined.filter(joined.a < 1.0) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) @@ -198,9 +198,8 @@ def test_subquery_where_location(snapshot): ) param = ibis.param("timestamp") expr = ( - t[["float_col", "timestamp_col", "int_col", "string_col"]][ - lambda t: t.timestamp_col < param - ] + t.select("float_col", "timestamp_col", "int_col", "string_col") + .filter(lambda t: t.timestamp_col < param) .group_by("string_col") .aggregate(foo=lambda t: t.float_col.sum()) .foo.count() diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index d41eaca70bf3..5f4b63df8e7b 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -29,9 +29,12 @@ param(lambda star1, **_: star1.order_by("f"), id="single_column"), param(lambda star1, **_: star1.limit(10), id="limit_simple"), param(lambda star1, **_: star1.limit(10, offset=5), id="limit_with_offset"), - param(lambda star1, **_: star1[star1.f > 0].limit(10), id="filter_then_limit"), param( - lambda star1, **_: star1.limit(10)[lambda x: x.f > 0], + lambda star1, **_: star1.filter(star1.f > 0).limit(10), + id="filter_then_limit", + ), + param( + lambda star1, **_: star1.limit(10).filter(lambda x: x.f > 0), id="limit_then_filter", ), param(lambda star1, **_: star1.count(), id="aggregate_table_count_metric"), @@ -60,16 +63,16 @@ def test_simple_joins(star1, star2, snapshot): pred = t1["foo_id"] == t2["foo_id"] pred2 = t1["bar_id"] == t2["foo_id"] - expr = t1.inner_join(t2, [pred])[[t1]] + expr = t1.inner_join(t2, [pred]).select(t1) snapshot.assert_match(to_sql(expr), "inner.sql") - expr = t1.left_join(t2, [pred])[[t1]] + expr = t1.left_join(t2, [pred]).select(t1) snapshot.assert_match(to_sql(expr), "left.sql") - expr = t1.outer_join(t2, [pred])[[t1]] + expr = t1.outer_join(t2, [pred]).select(t1) snapshot.assert_match(to_sql(expr), "outer.sql") - expr = t1.inner_join(t2, [pred, pred2])[[t1]] + expr = t1.inner_join(t2, [pred, pred2]).select(t1) snapshot.assert_match(to_sql(expr), "inner_two_preds.sql") assert_decompile_roundtrip(expr, snapshot) @@ -103,8 +106,8 @@ def test_join_between_joins(snapshot): "third", ) t4 = ibis.table([("key3", "string"), ("value4", "double")], "fourth") - left = t1.inner_join(t2, [("key1", "key1")])[t1, t2.value2] - right = t3.inner_join(t4, [("key3", "key3")])[t3, t4.value4] + left = t1.inner_join(t2, [("key1", "key1")]).select(t1, t2.value2) + right = t3.inner_join(t4, [("key3", "key3")]).select(t3, t4.value4) joined = left.inner_join(right, [("key2", "key2")]) @@ -130,13 +133,13 @@ def test_join_just_materialized(nation, region, customer, snapshot): def test_semi_join(star1, star2, snapshot): - expr = star1.semi_join(star2, [star1.foo_id == star2.foo_id])[[star1]] + expr = star1.semi_join(star2, [star1.foo_id == star2.foo_id]).select(star1) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) def test_anti_join(star1, star2, snapshot): - expr = star1.anti_join(star2, [star1.foo_id == star2.foo_id])[[star1]] + expr = star1.anti_join(star2, [star1.foo_id == star2.foo_id]).select(star1) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -145,11 +148,11 @@ def test_where_no_pushdown_possible(star1, star2, snapshot): t1 = star1 t2 = star2 - joined = t1.inner_join(t2, [t1.foo_id == t2.foo_id])[ + joined = t1.inner_join(t2, [t1.foo_id == t2.foo_id]).select( t1, (t1.f - t2.value1).name("diff") - ] + ) - expr = joined[joined.diff > 1] + expr = joined.filter(joined.diff > 1) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -186,7 +189,7 @@ def test_bug_duplicated_where(airlines, snapshot): dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean() ) - tmp1 = expr[expr.dev.notnull()] + tmp1 = expr.filter(expr.dev.notnull()) tmp2 = tmp1.order_by(ibis.desc("dev")) expr = tmp2.limit(10) snapshot.assert_match(to_sql(expr), "out.sql") @@ -229,8 +232,8 @@ def test_fuse_projections(snapshot): f1 = (table["foo"] + table["bar"]).name("baz") pred = table["value"] > 0 - table2 = table[table, f1] - table2_filtered = table2[pred] + table2 = table.select(table, f1) + table2_filtered = table2.filter(pred) f2 = (table2["foo"] * 2).name("qux") @@ -321,7 +324,7 @@ def test_bug_project_multiple_times(customer, nation, region, snapshot): nation, [customer.c_nationkey == nation.n_nationkey] ).inner_join(region, [nation.n_regionkey == region.r_regionkey]) proj1 = [customer, nation.n_name, region.r_name] - step1 = joined[proj1] + step1 = joined.select(proj1) topk_by = step1.c_acctbal.cast("double").sum() @@ -335,19 +338,19 @@ def test_bug_project_multiple_times(customer, nation, region, snapshot): def test_aggregate_projection_subquery(alltypes, snapshot): t = alltypes - proj = t[t.f > 0][t, (t.a + t.b).name("foo")] + proj = t.filter(t.f > 0).select(t, (t.a + t.b).name("foo")) def agg(x): return x.aggregate([x.foo.sum().name("foo total")], by=["g"]) # predicate gets pushed down - filtered = proj[proj.g == "bar"] + filtered = proj.filter(proj.g == "bar") # Pushdown is not possible (in Impala, Postgres, others) snapshot.assert_match(to_sql(proj), "proj.sql") snapshot.assert_match(to_sql(filtered), "filtered.sql") snapshot.assert_match(to_sql(agg(filtered)), "agg_filtered.sql") - snapshot.assert_match(to_sql(agg(proj[proj.foo < 10])), "agg_filtered2.sql") + snapshot.assert_match(to_sql(agg(proj.filter(proj.foo < 10))), "agg_filtered2.sql") def test_double_nested_subquery_no_aliases(snapshot): @@ -373,7 +376,7 @@ def test_aggregate_projection_alias_bug(star1, star2, snapshot): t1 = star1 t2 = star2 - what = t1.inner_join(t2, [t1.foo_id == t2.foo_id])[[t1, t2.value1]] + what = t1.inner_join(t2, [t1.foo_id == t2.foo_id]).select(t1, t2.value1) # TODO: Not fusing the aggregation with the projection yet expr = what.aggregate([what.value1.sum().name("total")], by=[what.foo_id]) @@ -386,7 +389,7 @@ def test_subquery_in_union(alltypes, snapshot): expr1 = t.group_by(["a", "g"]).aggregate(t.f.sum().name("metric")) expr2 = expr1.view() - join1 = expr1.join(expr2, expr1.g == expr2.g)[[expr1]] + join1 = expr1.join(expr2, expr1.g == expr2.g).select(expr1) join2 = join1.view() expr = join1.union(join2) @@ -405,11 +408,11 @@ def test_limit_with_self_join(functional_alltypes, snapshot): def test_topk_predicate_pushdown_bug(nation, customer, region, snapshot): # Observed on TPCH data - cplusgeo = customer.inner_join( - nation, [customer.c_nationkey == nation.n_nationkey] - ).inner_join(region, [nation.n_regionkey == region.r_regionkey])[ - customer, nation.n_name, region.r_name - ] + cplusgeo = ( + customer.inner_join(nation, [customer.c_nationkey == nation.n_nationkey]) + .inner_join(region, [nation.n_regionkey == region.r_regionkey]) + .select(customer, nation.n_name, region.r_name) + ) expr = cplusgeo.semi_join( cplusgeo.n_name.topk(10, by=cplusgeo.c_acctbal.sum()), "n_name" @@ -425,7 +428,7 @@ def test_topk_analysis_bug(snapshot): ) dests = ("ORD", "JFK", "SFO") - t = airlines[airlines.dest.isin(dests)] + t = airlines.filter(airlines.dest.isin(dests)) expr = ( t.semi_join(t.dest.topk(10, by=t.arrdelay.mean()), "dest") .group_by("origin") @@ -451,7 +454,7 @@ def test_bool_bool(snapshot): ) x = ibis.literal(True) - expr = t[(t.dest.cast("int64") == 0) == x] + expr = t.filter((t.dest.cast("int64") == 0) == x) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -460,7 +463,7 @@ def test_case_in_projection(alltypes, snapshot): t = alltypes expr = t.g.case().when("foo", "bar").when("baz", "qux").else_("default").end() expr2 = ibis.case().when(t.g == "foo", "bar").when(t.g == "baz", t.g).end() - expr = t[expr.name("col1"), expr2.name("col2"), t] + expr = t.select(expr.name("col1"), expr2.name("col2"), t) snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) @@ -469,12 +472,12 @@ def test_case_in_projection(alltypes, snapshot): def test_identifier_quoting(snapshot): data = ibis.table([("date", "int32"), ("explain", "string")], "table") - expr = data[data.date.name("else"), data.explain.name("join")] + expr = data.select(data.date.name("else"), data.explain.name("join")) snapshot.assert_match(to_sql(expr), "out.sql") def test_scalar_subquery_different_table(foo, bar, snapshot): - expr = foo[foo.y > bar.x.max()] + expr = foo.filter(foo.y > bar.x.max()) snapshot.assert_match(to_sql(expr), "out.sql") @@ -482,7 +485,7 @@ def test_exists_subquery(t1, t2, snapshot): # GH #660 cond = t1.key1 == t2.key1 - expr = t1[cond.any()] + expr = t1.filter(cond.any()) snapshot.assert_match(to_sql(expr), "out.sql") assert repr(expr) @@ -509,8 +512,8 @@ def test_filter_inside_exists(snapshot): "purchases", ) filt = purchases.ts > "2015-08-15" - cond = (events.user_id == purchases[filt].user_id).any() - expr = events[cond] + cond = (events.user_id == purchases.filter(filt).user_id).any() + expr = events.filter(cond) snapshot.assert_match(to_sql(expr), "out.sql") @@ -532,7 +535,7 @@ def test_order_by_on_limit_yield_subquery(functional_alltypes, snapshot): def test_join_with_limited_table(star1, star2, snapshot): limited = star1.limit(100) - expr = limited.inner_join(star2, [limited.foo_id == star2.foo_id])[[limited]] + expr = limited.inner_join(star2, [limited.foo_id == star2.foo_id]).select(limited) snapshot.assert_match(to_sql(expr), "out.sql") @@ -571,7 +574,7 @@ def test_join_filtered_tables_no_pushdown(snapshot): tbl_b_filter = tbl_b.filter([tbl_b.year == 2016, tbl_b.month == 2, tbl_b.day == 29]) joined = tbl_a_filter.left_join(tbl_b_filter, ["year", "month", "day"]) - result = joined[tbl_a_filter.value_a, tbl_b_filter.value_b] + result = joined.select(tbl_a_filter.value_a, tbl_b_filter.value_b) snapshot.assert_match(to_sql(result), "out.sql") @@ -580,14 +583,14 @@ def test_loj_subquery_filter_handling(snapshot): # #781 left = ibis.table([("id", "int32"), ("desc", "string")], "foo") right = ibis.table([("id", "int32"), ("desc", "string")], "bar") - left = left[left.id < 2] - right = right[right.id < 3] + left = left.filter(left.id < 2) + right = right.filter(right.id < 3) joined = left.left_join(right, ["id", "desc"]) - expr = joined[ + expr = joined.select( [left[name].name("left_" + name) for name in left.columns] + [right[name].name("right_" + name) for name in right.columns] - ] + ) snapshot.assert_match(to_sql(expr), "out.sql") @@ -622,16 +625,16 @@ def test_filter_predicates(snapshot): def test_join_projection_subquery_bug(nation, region, customer, snapshot): # From an observed bug, derived from tpch tables - geo = nation.inner_join(region, [("n_regionkey", "r_regionkey")])[ + geo = nation.inner_join(region, [("n_regionkey", "r_regionkey")]).select( nation.n_nationkey, nation.n_name.name("nation"), region.r_name.name("region"), - ] + ) - expr = geo.inner_join(customer, [("n_nationkey", "c_nationkey")])[ + expr = geo.inner_join(customer, [("n_nationkey", "c_nationkey")]).select( customer, geo, - ] + ) snapshot.assert_match(to_sql(expr), "out.sql") @@ -695,15 +698,16 @@ def test_subquery_factor_correlated_subquery(con, snapshot): tpch = ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) - .join(orders, orders.o_custkey == customer.c_custkey)[fields_of_interest] + .join(orders, orders.o_custkey == customer.c_custkey) + .select(fields_of_interest) ) # Self-reference + correlated subquery complicates things t2 = tpch.view() - conditional_avg = t2[t2.region == tpch.region].amount.mean() + conditional_avg = t2.filter(t2.region == tpch.region).amount.mean() amount_filter = tpch.amount > conditional_avg - expr = tpch[amount_filter].limit(10) + expr = tpch.filter(amount_filter).limit(10) snapshot.assert_match(to_sql(expr), "out.sql") @@ -711,13 +715,17 @@ def test_self_join_subquery_distinct_equal(con, snapshot): region = con.table("tpch_region") nation = con.table("tpch_nation") - j1 = region.join(nation, region.r_regionkey == nation.n_regionkey)[region, nation] - - j2 = region.join(nation, region.r_regionkey == nation.n_regionkey)[ + j1 = region.join(nation, region.r_regionkey == nation.n_regionkey).select( region, nation - ].view() + ) - expr = j1.join(j2, j1.r_regionkey == j2.r_regionkey)[j1.r_name, j2.n_name] + j2 = ( + region.join(nation, region.r_regionkey == nation.n_regionkey) + .select(region, nation) + .view() + ) + + expr = j1.join(j2, j1.r_regionkey == j2.r_regionkey).select(j1.r_name, j2.n_name) snapshot.assert_match(to_sql(expr), "out.sql") @@ -739,7 +747,8 @@ def test_tpch_self_join_failure(con, snapshot): joined_all = ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) - .join(orders, orders.o_custkey == customer.c_custkey)[fields_of_interest] + .join(orders, orders.o_custkey == customer.c_custkey) + .select(fields_of_interest) ) year = joined_all.odate.year().name("year") @@ -750,9 +759,9 @@ def test_tpch_self_join_failure(con, snapshot): prior = annual_amounts.view() yoy_change = (current.total - prior.total).name("yoy_change") - yoy = current.join(prior, current.year == (prior.year - 1))[ + yoy = current.join(prior, current.year == (prior.year - 1)).select( current.region, current.year, yoy_change - ] + ) snapshot.assert_match(to_sql(yoy), "out.sql") # Compiler.to_sql(yoy) # fail @@ -762,13 +771,13 @@ def test_subquery_in_filter_predicate(star1, snapshot): t1 = star1 pred = t1.f > t1.f.mean() - expr = t1[pred] + expr = t1.filter(pred) snapshot.assert_match(to_sql(expr), "expr.sql") # This brought out another expression rewriting bug, since the filtered # table isn't found elsewhere in the expression. - pred2 = t1.f > t1[t1.foo_id == "foo"].f.mean() - expr2 = t1[pred2] + pred2 = t1.f > t1.filter(t1.foo_id == "foo").f.mean() + expr2 = t1.filter(pred2) snapshot.assert_match(to_sql(expr2), "expr2.sql") @@ -776,11 +785,11 @@ def test_filter_subquery_derived_reduction(star1, snapshot): t1 = star1 # Reduction can be nested inside some scalar expression - pred3 = t1.f > t1[t1.foo_id == "foo"].f.mean().log() - pred4 = t1.f > (t1[t1.foo_id == "foo"].f.mean().log() + 1) + pred3 = t1.f > t1.filter(t1.foo_id == "foo").f.mean().log() + pred4 = t1.f > (t1.filter(t1.foo_id == "foo").f.mean().log() + 1) - expr3 = t1[pred3] - expr4 = t1[pred4] + expr3 = t1.filter(pred3) + expr4 = t1.filter(pred4) snapshot.assert_match(to_sql(expr3), "expr3.sql") snapshot.assert_match(to_sql(expr4), "expr4.sql") @@ -838,11 +847,11 @@ def test_filter_self_join_analysis_bug(snapshot): metric = purchases.amount.sum().name("total") agged = purchases.group_by(["region", "kind"]).aggregate(metric) - left = agged[agged.kind == "foo"] - right = agged[agged.kind == "bar"] + left = agged.filter(agged.kind == "foo") + right = agged.filter(agged.kind == "bar") joined = left.join(right, left.region == right.region) - result = joined[left.region, (left.total - right.total).name("diff")] + result = joined.select(left.region, (left.total - right.total).name("diff")) snapshot.assert_match(to_sql(result), "result.sql") @@ -907,8 +916,8 @@ def test_chain_limit_doesnt_collapse(snapshot): def test_join_with_conditional_aggregate(snapshot): left = ibis.table({"on": "int", "by": "string"}, name="left") right = ibis.table({"on": "int", "by": "string", "val": "float"}, name="right") - stat = right[(right.by == left.by) & (right.on <= left.on)]["on"].max() - merged = left.join(right, how="left", predicates=left.by == right.by)[ + stat = right.filter(right.by == left.by, right.on <= left.on)["on"].max() + merged = left.join(right, how="left", predicates=left.by == right.by).filter( right.on == stat - ] + ) snapshot.assert_match(to_sql(merged), "result.sql") diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py index f979b864d916..81f2b57e6a8f 100644 --- a/ibis/backends/tests/sql/test_sql.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -186,7 +186,7 @@ def test_coalesce(functional_alltypes, snapshot): def test_named_expr(functional_alltypes, snapshot): - expr = functional_alltypes[(functional_alltypes.double_col * 2).name("foo")] + expr = functional_alltypes.select((functional_alltypes.double_col * 2).name("foo")) snapshot.assert_match(to_sql(expr), "out.sql") @@ -283,12 +283,12 @@ def test_limit(star1, expr_fn, snapshot): def test_limit_filter(star1, snapshot): - expr = star1[star1.f > 0].limit(10) + expr = star1.filter(star1.f > 0).limit(10) snapshot.assert_match(to_sql(expr), "out.sql") def test_limit_subquery(star1, snapshot): - expr = star1.limit(10)[lambda x: x.f > 0] + expr = star1.limit(10).filter(lambda x: x.f > 0) snapshot.assert_match(to_sql(expr), "out.sql") @@ -299,7 +299,7 @@ def test_cte_factor_distinct_but_equal(alltypes, snapshot): expr1 = t.group_by("g").aggregate(t.f.sum().name("metric")) expr2 = tt.group_by("g").aggregate(tt.f.sum().name("metric")).view() - expr = expr1.join(expr2, expr1.g == expr2.g)[[expr1]] + expr = expr1.join(expr2, expr1.g == expr2.g).select(expr1) snapshot.assert_match(to_sql(expr), "out.sql") @@ -307,7 +307,7 @@ def test_cte_factor_distinct_but_equal(alltypes, snapshot): def test_self_reference_join(star1, snapshot): t1 = star1 t2 = t1.view() - expr = t1.inner_join(t2, [t1.foo_id == t2.bar_id])[[t1]] + expr = t1.inner_join(t2, [t1.foo_id == t2.bar_id]).select(t1) snapshot.assert_match(to_sql(expr), "out.sql") @@ -318,15 +318,15 @@ def test_self_reference_in_not_exists(functional_alltypes, snapshot): cond = (t.string_col == t2.string_col).any() - semi = t[cond] - anti = t[-cond] + semi = t.filter(cond) + anti = t.filter(-cond) snapshot.assert_match(to_sql(semi), "semi.sql") snapshot.assert_match(to_sql(anti), "anti.sql") def test_where_uncorrelated_subquery(foo, bar, snapshot): - expr = foo[foo.job.isin(bar.job)] + expr = foo.filter(foo.job.isin(bar.job)) snapshot.assert_match(to_sql(expr), "out.sql") @@ -335,8 +335,8 @@ def test_where_correlated_subquery(foo, snapshot): t1 = foo t2 = t1.view() - stat = t2[t1.dept_id == t2.dept_id].y.mean() - expr = t1[t1.y > stat] + stat = t2.filter(t1.dept_id == t2.dept_id).y.mean() + expr = t1.filter(t1.y > stat) snapshot.assert_match(to_sql(expr), "out.sql") @@ -346,7 +346,7 @@ def test_subquery_aliased(star1, star2, snapshot): t2 = star2 agged = t1.aggregate([t1.f.sum().name("total")], by=["foo_id"]) - expr = agged.inner_join(t2, [agged.foo_id == t2.foo_id])[agged, t2.value1] + expr = agged.inner_join(t2, [agged.foo_id == t2.foo_id]).select(agged, t2.value1) snapshot.assert_match(to_sql(expr), "out.sql") @@ -356,9 +356,9 @@ def test_lower_projection_sort_key(star1, star2, snapshot): t2 = star2 agged = t1.aggregate([t1.f.sum().name("total")], by=["foo_id"]) - expr = agged.inner_join(t2, [agged.foo_id == t2.foo_id])[agged, t2.value1] + expr = agged.inner_join(t2, [agged.foo_id == t2.foo_id]).select(agged, t2.value1) - expr2 = expr[expr.total > 100].order_by(ibis.desc("total")) + expr2 = expr.filter(expr.total > 100).order_by(ibis.desc("total")) snapshot.assert_match(to_sql(expr2), "out.sql") assert_decompile_roundtrip(expr2, snapshot) @@ -367,12 +367,12 @@ def test_exists(foo_t, bar_t, snapshot): t1 = foo_t t2 = bar_t cond = (t1.key1 == t2.key1).any() - e1 = t1[cond] + e1 = t1.filter(cond) snapshot.assert_match(to_sql(e1), "e1.sql") cond2 = ((t1.key1 == t2.key1) & (t2.key2 == "foo")).any() - e2 = t1[cond2] + e2 = t1.filter(cond2) snapshot.assert_match(to_sql(e2), "e2.sql") @@ -389,7 +389,8 @@ def test_not_exists(not_exists, snapshot): lambda t: t["string_col", "int_col"].distinct(), id="projection_distinct" ), param( - lambda t: t[t.string_col].distinct(), id="single_column_projection_distinct" + lambda t: t.select(t.string_col).distinct(), + id="single_column_projection_distinct", ), param(lambda t: t.int_col.nunique().name("nunique"), id="count_distinct"), param( @@ -432,15 +433,12 @@ def test_where_correlated_subquery_with_join(snapshot): supplier = ibis.table([("s_suppkey", "int64")], name="supplier") q = part.join(partsupp, part.p_partkey == partsupp.ps_partkey) - q = q[ - part.p_partkey, - partsupp.ps_supplycost, - ] + q = q.select(part.p_partkey, partsupp.ps_supplycost) subq = partsupp.join(supplier, supplier.s_suppkey == partsupp.ps_suppkey) subq = subq.select(partsupp.ps_partkey, partsupp.ps_supplycost) - subq = subq[subq.ps_partkey == q.p_partkey] + subq = subq.filter(subq.ps_partkey == q.p_partkey) - expr = q[q.ps_supplycost == subq.ps_supplycost.min()] + expr = q.filter(q.ps_supplycost == subq.ps_supplycost.min()) snapshot.assert_match(to_sql(expr), "out.sql") @@ -451,7 +449,7 @@ def test_mutate_filter_join_no_cross_join(snapshot): name="person", ) mutated = person.mutate(age=ibis.literal(400)) - expr = mutated.filter(mutated.age <= 40)[mutated.person_id] + expr = mutated.filter(mutated.age <= 40).select(mutated.person_id) snapshot.assert_match(to_sql(expr), "out.sql") @@ -525,7 +523,7 @@ def test_gh_1045(test1, test2, test3, snapshot): t3 = t3.mutate(t3_val2=t3.id3) t4 = t3.join(t2, t2.id2b == t3.id3) - t1 = t1[[t1[c].name(f"t1_{c}") for c in t1.columns]] + t1 = t1.select([t1[c].name(f"t1_{c}") for c in t1.columns]) expr = t1.left_join(t4, t1.t1_id1 == t4.id2a) @@ -575,7 +573,7 @@ def test_no_cart_join(snapshot): def test_order_by_expr(snapshot): t = ibis.table(dict(a="int", b="string"), name="t") - expr = t[lambda t: t.a == 1].order_by(lambda t: t.b + "a") + expr = t.filter(lambda t: t.a == 1).order_by(lambda t: t.b + "a") snapshot.assert_match(to_sql(expr), "out.sql") @@ -639,7 +637,8 @@ def test_no_cartesian_join(snapshot): final = ( customers.left_join(customer_orders, "customer_id") .drop("customer_id_right") - .left_join(customer_payments, "customer_id")[ + .left_join(customer_payments, "customer_id") + .select( customers.customer_id, customers.first_name, customers.last_name, @@ -647,7 +646,7 @@ def test_no_cartesian_join(snapshot): customer_orders.most_recent_order, customer_orders.number_of_orders, customer_payments.total_amount.name("customer_lifetime_value"), - ] + ) ) snapshot.assert_match(ibis.to_sql(final, dialect="duckdb"), "out.sql") diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index a44d95cb08ac..19e1207ee405 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1596,7 +1596,7 @@ def test_agg_sort(alltypes): def test_filter(backend, alltypes, df): expr = ( - alltypes[_.string_col == "1"] + alltypes.filter(_.string_col == "1") .mutate(x=L(1, "int64")) .group_by(_.x) .aggregate(sum=_.double_col.sum()) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 1f8bfc55db87..c38a26f0241e 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -218,7 +218,7 @@ def test_load_data(backend, con, temp_table, lamduh): [ param(lambda t: t.string_col, [("string_col", dt.String)], id="column"), param( - lambda t: t[t.string_col, t.bigint_col], + lambda t: t.select(t.string_col, t.bigint_col), [("string_col", dt.String), ("bigint_col", dt.Int64)], id="table", ), diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 8dbcefaacdc0..23f5d83fdd92 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -174,7 +174,7 @@ def test_isna(backend, alltypes, col, value, filt): table = alltypes.select(**{col: value}) df = table.execute() - result = table[filt(table[col])].execute().reset_index(drop=True) + result = table.filter(filt(table[col])).execute().reset_index(drop=True) expected = df[df[col].isna()].reset_index(drop=True) backend.assert_frame_equal(result, expected) @@ -255,7 +255,7 @@ def test_identical_to(backend, alltypes, sorted_df): dt = df[["tinyint_col", "double_col"]] ident = sorted_alltypes.tinyint_col.identical_to(sorted_alltypes.double_col) - expr = sorted_alltypes["id", ident.name("tmp")].order_by("id") + expr = sorted_alltypes.select("id", ident.name("tmp")).order_by("id") result = expr.execute().tmp expected = (dt.tinyint_col.isnull() & dt.double_col.isnull()) | ( @@ -280,9 +280,9 @@ def test_identical_to(backend, alltypes, sorted_df): @pytest.mark.notimpl(["druid"]) def test_isin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by("id") - expr = sorted_alltypes[ + expr = sorted_alltypes.select( "id", sorted_alltypes[column].isin(elements).name("tmp") - ].order_by("id") + ).order_by("id") result = expr.execute().tmp expected = sorted_df[column].isin(elements) @@ -304,9 +304,9 @@ def test_isin(backend, alltypes, sorted_df, column, elements): @pytest.mark.notimpl(["druid"]) def test_notin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by("id") - expr = sorted_alltypes[ + expr = sorted_alltypes.select( "id", sorted_alltypes[column].notin(elements).name("tmp") - ].order_by("id") + ).order_by("id") result = expr.execute().tmp expected = ~sorted_df[column].isin(elements) @@ -339,7 +339,7 @@ def test_notin(backend, alltypes, sorted_df, column, elements): @pytest.mark.notimpl(["druid"]) def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): sorted_alltypes = alltypes.order_by("id") - table = sorted_alltypes[predicate_fn(sorted_alltypes)].order_by("id") + table = sorted_alltypes.filter(predicate_fn(sorted_alltypes)).order_by("id") result = table.execute() expected = sorted_df[expected_fn(sorted_df)] @@ -427,8 +427,8 @@ def test_select_filter_mutate(backend, alltypes, df): ) # Actual test - t = t[t.columns] - t = t[~t["float_col"].isnan()] + t = t.select(t.columns) + t = t.filter(~t["float_col"].isnan()) t = t.mutate(float_col=t["float_col"].cast("float64")) result = t.execute() @@ -956,7 +956,7 @@ def test_table_describe_large(con): ], ) def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): - expr = alltypes[ibis_op] + expr = alltypes.filter(ibis_op) expected = df.loc[pandas_op(df)].sort_values(["id"]).reset_index(drop=True) result = expr.execute().sort_values(["id"]).reset_index(drop=True) backend.assert_frame_equal(result, expected) @@ -990,7 +990,7 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): ], ) def test_isin_notin_column_expr(backend, alltypes, df, ibis_op, pandas_op): - expr = alltypes[ibis_op].order_by("id") + expr = alltypes.filter(ibis_op).order_by("id") expected = df[pandas_op(df)].sort_values(["id"]).reset_index(drop=True) result = expr.execute() backend.assert_frame_equal(result, expected) @@ -1078,11 +1078,6 @@ def test_interactive(alltypes, monkeypatch): repr(expr) -def test_correlated_subquery(alltypes): - expr = alltypes[_.double_col > _.view().double_col] - assert expr.compile() is not None - - @pytest.mark.notimpl(["polars", "pyspark"]) @pytest.mark.notimpl( ["risingwave"], @@ -1090,8 +1085,8 @@ def test_correlated_subquery(alltypes): reason='DataFrame.iloc[:, 0] (column name="playerID") are different', ) def test_uncorrelated_subquery(backend, batting, batting_df): - subset_batting = batting[batting.yearID <= 2000] - expr = batting[_.yearID == subset_batting.yearID.max()]["playerID", "yearID"] + subset_batting = batting.filter(batting.yearID <= 2000) + expr = batting.filter(_.yearID == subset_batting.yearID.max())["playerID", "yearID"] result = expr.execute() expected = batting_df[batting_df.yearID == 2000][["playerID", "yearID"]] @@ -1124,10 +1119,10 @@ def test_int_scalar(alltypes): def test_exists(batting, awards_players, method_name): years = [1980, 1981] batting_years = [1871, *years] - batting = batting[batting.yearID.isin(batting_years)] - awards_players = awards_players[awards_players.yearID.isin(years)] + batting = batting.filter(batting.yearID.isin(batting_years)) + awards_players = awards_players.filter(awards_players.yearID.isin(years)) method = methodcaller(method_name) - expr = batting[method(batting.yearID == awards_players.yearID)] + expr = batting.filter(method(batting.yearID == awards_players.yearID)) result = expr.execute() assert not result.empty diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index 276377a249d9..bd19507f4a7b 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -86,6 +86,6 @@ def test_interactive_non_compilable_repr_does_not_fail(table): def test_isin_rule_suppressed_exception_repr_not_fail(table): bool_clause = table["string_col"].notin(["1", "4", "7"]) - expr = table[bool_clause]["string_col"].value_counts() + expr = table.filter(bool_clause)["string_col"].value_counts() repr(expr) diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index f10ecb782eb1..6e8d09e01469 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -64,8 +64,8 @@ def check_eq(left, right, how, **kwargs): ) @pytest.mark.notimpl(["druid"]) def test_mutating_join(backend, batting, awards_players, how): - left = batting[batting.yearID == 2015] - right = awards_players[awards_players.lgID == "NL"].drop("yearID", "lgID") + left = batting.filter(batting.yearID == 2015) + right = awards_players.filter(awards_players.lgID == "NL").drop("yearID", "lgID") left_df = left.execute() right_df = right.execute() @@ -114,8 +114,8 @@ def test_mutating_join(backend, batting, awards_players, how): @pytest.mark.notimpl(["dask", "druid"]) @pytest.mark.notyet(["flink"], reason="Flink doesn't support semi joins or anti joins") def test_filtering_join(backend, batting, awards_players, how): - left = batting[batting.yearID == 2015] - right = awards_players[awards_players.lgID == "NL"].drop("yearID", "lgID") + left = batting.filter(batting.yearID == 2015) + right = awards_players.filter(awards_players.lgID == "NL").drop("yearID", "lgID") left_df = left.execute() right_df = right.execute() @@ -142,10 +142,10 @@ def test_filtering_join(backend, batting, awards_players, how): def test_join_then_filter_no_column_overlap(awards_players, batting): - left = batting[batting.yearID == 2015] + left = batting.filter(batting.yearID == 2015) year = left.yearID.name("year") - left = left[year, "RBI"] - right = awards_players[awards_players.lgID == "NL"] + left = left.select(year, "RBI") + right = awards_players.filter(awards_players.lgID == "NL") expr = left.join(right, left.year == right.yearID) filters = [expr.RBI == 9] @@ -196,8 +196,8 @@ def test_semi_join_topk(con, batting, awards_players, func): reason="postgres can't handle null types columns", ) def test_join_with_pandas(batting, awards_players): - batting_filt = batting[lambda t: t.yearID < 1900] - awards_players_filt = awards_players[lambda t: t.yearID < 1900].execute() + batting_filt = batting.filter(lambda t: t.yearID < 1900) + awards_players_filt = awards_players.filter(lambda t: t.yearID < 1900).execute() assert isinstance(awards_players_filt, pd.DataFrame) expr = batting_filt.join(awards_players_filt, "yearID") df = expr.execute() @@ -205,10 +205,10 @@ def test_join_with_pandas(batting, awards_players): def test_join_with_pandas_non_null_typed_columns(batting, awards_players): - batting_filt = batting[lambda t: t.yearID < 1900][["yearID"]] - awards_players_filt = awards_players[lambda t: t.yearID < 1900][ - ["yearID"] - ].execute() + batting_filt = batting.filter(lambda t: t.yearID < 1900).select("yearID") + awards_players_filt = ( + awards_players.filter(lambda t: t.yearID < 1900).select("yearID").execute() + ) # ensure that none of the columns of either table have type null batting_schema = batting_filt.schema() diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 72ca7b12a4d6..2085e2750290 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -88,7 +88,7 @@ def test_cte_refs_in_topo_order(backend, snapshot): @pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL", raises=ValueError) def test_isin_bug(con, snapshot): t = ibis.table(dict(x="int"), name="t") - good = t[t.x > 2].x + good = t.filter(t.x > 2).x expr = t.x.isin(good) snapshot.assert_match(str(ibis.to_sql(expr, dialect=con.name)), "out.sql") diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 8757175f6b60..b00fff2ae047 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -120,7 +120,7 @@ def test_collect_into_struct(alltypes): t = alltypes expr = ( - t[_.string_col.isin(("0", "1"))] + t.filter(_.string_col.isin(("0", "1"))) .group_by(group="string_col") .agg( val=lambda t: ibis.struct( diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index b1a907a50e4d..ee08a0083835 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1037,7 +1037,7 @@ def test_interval_add_cast_scalar(backend, alltypes): def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() delta = alltypes.bigint_col.cast("interval('D')") - expr = alltypes["id", (timestamp_date + delta).name("tmp")] + expr = alltypes.select("id", (timestamp_date + delta).name("tmp")) result = expr.execute().sort_values("id").reset_index().tmp df = df.sort_values("id").reset_index(drop=True) expected = ( @@ -1702,7 +1702,7 @@ def test_interval_literal(con, backend): def test_date_column_from_ymd(backend, con, alltypes, df): c = alltypes.timestamp_col expr = ibis.date(c.year(), c.month(), c.day()) - tbl = alltypes[expr.name("timestamp_col")] + tbl = alltypes.select(expr.name("timestamp_col")) result = con.execute(tbl) golden = df.timestamp_col.dt.date.astype(result.timestamp_col.dtype) @@ -1719,7 +1719,7 @@ def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): expr = ibis.timestamp( c.year(), c.month(), c.day(), c.hour(), c.minute(), c.second() ) - tbl = alltypes[expr.name("timestamp_col")] + tbl = alltypes.select(expr.name("timestamp_col")) result = con.execute(tbl) golden = df.timestamp_col.dt.floor("s").astype(result.timestamp_col.dtype) diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index 5c80df040c53..be0693134892 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -339,7 +339,7 @@ def test_reduction_udf_array_return_type(udf_backend, udf_alltypes, udf_df): def test_reduction_udf_on_empty_data(udf_backend, udf_alltypes): """Test that summarization can handle empty data.""" # First filter down to zero rows - t = udf_alltypes[udf_alltypes["int_col"] > np.inf] + t = udf_alltypes.filter(udf_alltypes["int_col"] > np.inf) result = t.group_by("year").aggregate(mean=calc_mean(t["int_col"])).execute() expected = pd.DataFrame({"year": [], "mean": []}) # We check that the result is an empty DataFrame, diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index a2664eef4707..7d21fb9f7fb1 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -610,7 +610,7 @@ def test_grouped_unbounded_window( def test_simple_ungrouped_unbound_following_window( backend, alltypes, ibis_method, pandas_fn ): - t = alltypes[alltypes.double_col < 50].order_by("id") + t = alltypes.filter(alltypes.double_col < 50).order_by("id") df = t.execute() w = ibis.window(rows=(0, None), order_by=t.id) @@ -635,7 +635,7 @@ def test_simple_ungrouped_unbound_following_window( reason="Feature is not yet implemented: Window function with empty PARTITION BY is not supported yet", ) def test_simple_ungrouped_window_with_scalar_order_by(alltypes): - t = alltypes[alltypes.double_col < 50].order_by("id") + t = alltypes.filter(alltypes.double_col < 50).order_by("id") w = ibis.window(rows=(0, None), order_by=ibis.null()) expr = t.double_col.sum().over(w).name("double_col") # hard to reproduce this in pandas, so just test that it actually executes diff --git a/ibis/backends/tests/tpc/ds/test_queries.py b/ibis/backends/tests/tpc/ds/test_queries.py index 4520ee9544b3..540503cd42e3 100644 --- a/ibis/backends/tests/tpc/ds/test_queries.py +++ b/ibis/backends/tests/tpc/ds/test_queries.py @@ -3480,11 +3480,12 @@ def agg_sales_net_by_month(sales, ns, sales_expr, net_expr): ) .join(date_dim, sales[f"{ns}_sold_date_sk"] == date_dim.d_date_sk) .join(time_dim, sales[f"{ns}_sold_time_sk"] == time_dim.t_time_sk) - .join(ship_mode, sales[f"{ns}_ship_mode_sk"] == ship_mode.sm_ship_mode_sk)[ - (_.d_year == 2001) - & (_.t_time.between(30838, 30838 + 28800)) - & (_.sm_carrier.isin(["DHL", "BARIAN"])) - ] + .join(ship_mode, sales[f"{ns}_ship_mode_sk"] == ship_mode.sm_ship_mode_sk) + .filter( + (_.d_year == 2001), + (_.t_time.between(30838, 30838 + 28800)), + (_.sm_carrier.isin(["DHL", "BARIAN"])), + ) .group_by( "w_warehouse_name", "w_warehouse_sq_ft", diff --git a/ibis/backends/tests/tpc/h/test_queries.py b/ibis/backends/tests/tpc/h/test_queries.py index f51817a31a76..57c1384d9338 100644 --- a/ibis/backends/tests/tpc/h/test_queries.py +++ b/ibis/backends/tests/tpc/h/test_queries.py @@ -66,9 +66,9 @@ def test_02(part, supplier, partsupp, nation, region): .join(region, nation.n_regionkey == region.r_regionkey) ) - subexpr = subexpr[ + subexpr = subexpr.filter( (subexpr.r_name == REGION) & (expr.p_partkey == subexpr.ps_partkey) - ] + ) filters = [ expr.p_size == SIZE, @@ -210,7 +210,7 @@ def test_07(supplier, lineitem, orders, customer, nation): q = q.join(n1, supplier.s_nationkey == n1.n_nationkey) q = q.join(n2, customer.c_nationkey == n2.n_nationkey) - q = q[ + q = q.select( n1.n_name.name("supp_nation"), n2.n_name.name("cust_nation"), lineitem.l_shipdate, @@ -218,7 +218,7 @@ def test_07(supplier, lineitem, orders, customer, nation): lineitem.l_discount, lineitem.l_shipdate.year().name("l_year"), (lineitem.l_extendedprice * (1 - lineitem.l_discount)).name("volume"), - ] + ) q = q.filter( [ @@ -255,14 +255,14 @@ def test_08(part, supplier, region, lineitem, orders, customer, nation): q = q.join(region, n1.n_regionkey == region.r_regionkey) q = q.join(n2, supplier.s_nationkey == n2.n_nationkey) - q = q[ + q = q.select( orders.o_orderdate.year().name("o_year"), (lineitem.l_extendedprice * (1 - lineitem.l_discount)).name("volume"), n2.n_name.name("nation"), region.r_name, orders.o_orderdate, part.p_type, - ] + ) q = q.filter( [ @@ -297,14 +297,14 @@ def test_09(part, supplier, lineitem, partsupp, orders, nation): q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey) q = q.join(nation, supplier.s_nationkey == nation.n_nationkey) - q = q[ + q = q.select( (q.l_extendedprice * (1 - q.l_discount) - q.ps_supplycost * q.l_quantity).name( "amount" ), q.o_orderdate.year().name("o_year"), q.n_name.name("nation"), q.p_name, - ] + ) q = q.filter([q.p_name.like("%" + COLOR + "%")]) @@ -494,7 +494,7 @@ def test_15(lineitem, supplier): q = supplier.join(qrev, supplier.s_suppkey == qrev.l_suppkey) q = q.filter([q.total_revenue == qrev.total_revenue.max()]) - q = q[q.s_suppkey, q.s_name, q.s_address, q.s_phone, q.total_revenue] + q = q.select(q.s_suppkey, q.s_name, q.s_address, q.s_phone, q.total_revenue) return q.order_by([q.s_suppkey]) @@ -679,7 +679,7 @@ def test_20(supplier, nation, partsupp, part, lineitem): q1 = q1.filter([q1.n_name == NATION, q1.s_suppkey.isin(q2.ps_suppkey)]) - q1 = q1[q1.s_name, q1.s_address] + q1 = q1.select(q1.s_name, q1.s_address) return q1.order_by(q1.s_name) @@ -704,7 +704,7 @@ def test_21(supplier, lineitem, orders, nation): q = q.join(lineitem, supplier.s_suppkey == lineitem.l_suppkey) q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey) q = q.join(nation, supplier.s_nationkey == nation.n_nationkey) - q = q[ + q = q.select( q.l_orderkey.name("l1_orderkey"), q.o_orderstatus, q.l_receiptdate, @@ -712,7 +712,7 @@ def test_21(supplier, lineitem, orders, nation): q.l_suppkey.name("l1_suppkey"), q.s_name, q.n_name, - ] + ) q = q.filter( [ q.o_orderstatus == "F", @@ -764,9 +764,9 @@ def test_22(customer, orders): ~(orders.o_custkey == customer.c_custkey).any(), ] ) - custsale = custsale[ + custsale = custsale.select( customer.c_phone.substr(0, 2).name("cntrycode"), customer.c_acctbal - ] + ) gq = custsale.group_by(custsale.cntrycode) outerq = gq.aggregate(numcust=custsale.count(), totacctbal=custsale.c_acctbal.sum()) diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index 806eed536931..6fe8b3cddf94 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -89,7 +89,7 @@ def test_format_multiple_join_with_projection(snapshot): table3 = ibis.table([("bar_id", "string"), ("value2", "double")], "three") - filtered = table[table["f"] > 0] + filtered = table.filter(table["f"] > 0) pred1 = filtered["foo_id"] == table2["foo_id"] pred2 = filtered["bar_id"] == table3["bar_id"] @@ -98,7 +98,7 @@ def test_format_multiple_join_with_projection(snapshot): j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields - view = j2[[filtered, table2["value1"], table3["value2"]]] + view = j2.select(filtered, table2["value1"], table3["value2"]) # it works! result = repr(view) @@ -112,7 +112,7 @@ def test_memoize_filtered_table(snapshot): ) dests = ["ORD", "JFK", "SFO"] - t = airlines[airlines.dest.isin(dests)] + t = airlines.filter(airlines.dest.isin(dests)) delay_filter = t.dest.topk(10, by=t.arrdelay.mean()) result = repr(delay_filter) @@ -149,11 +149,11 @@ def test_memoize_filtered_tables_in_join(snapshot): metric = purchases.amount.sum().name("total") agged = purchases.group_by(["region", "kind"]).aggregate(metric) - left = agged[agged.kind == "foo"] - right = agged[agged.kind == "bar"] + left = agged.filter(agged.kind == "foo") + right = agged.filter(agged.kind == "bar") cond = left.region == right.region - joined = left.join(right, cond)[left, right.total.name("right_total")] + joined = left.join(right, cond).select(left, right.total.name("right_total")) result = repr(joined) snapshot.assert_match(result, "repr.txt") @@ -179,7 +179,7 @@ def test_scalar_parameter_formatting(): def test_same_column_multiple_aliases(snapshot): table = ibis.table([("col", "int64")], name="t") - expr = table[table.col.name("fakealias1"), table.col.name("fakealias2")] + expr = table.select(table.col.name("fakealias1"), table.col.name("fakealias2")) result = repr(expr) assert "UnboundTable: t" in result @@ -412,7 +412,7 @@ def values(self): return {} table = MyRelation(alltypes, kind="foo").to_expr() - expr = table[table, table.a.name("a2")] + expr = table.select(table, table.a.name("a2")) result = repr(expr) snapshot.assert_match(result, "repr.txt") @@ -441,7 +441,7 @@ def shape(self): def test_format_show_variables(monkeypatch, alltypes, snapshot): monkeypatch.setattr(ibis.options.repr, "show_variables", True) - filtered = alltypes[alltypes.f > 0] + filtered = alltypes.filter(alltypes.f > 0) ordered = filtered.order_by("f") projected = ordered[["a", "b", "f"]] diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 34f76ae7763e..f996c51f5dca 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -769,7 +769,7 @@ def test_join_predicate_dereferencing(): table2 = ibis.table({"foo_id": str, "value1": float, "value3": float}) table3 = ibis.table({"bar_id": str, "value2": float}) - filtered = table[table["f"] > 0] + filtered = table.filter(table["f"] > 0) # dereference table.foo_id to filtered.foo_id j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) @@ -793,7 +793,7 @@ def test_join_predicate_dereferencing(): j1 = filtered.left_join(table2, table["foo_id"] == table2["foo_id"]) j2 = j1.inner_join(table3, filtered["bar_id"] == table3["bar_id"]) - view = j2[[filtered, table2["value1"], table3["value2"]]] + view = j2.select(filtered, table2["value1"], table3["value2"]) with join_tables(j2) as (r1, r2, r3): expected = ops.JoinChain( first=r1, @@ -1148,7 +1148,7 @@ def test_self_join_view(): def test_self_join_with_view_projection(): t1 = ibis.table(schema={"x": "int", "y": "int", "z": "str"}) t2 = t1.view() - expr = t1.inner_join(t2, ["x"])[[t1]] + expr = t1.inner_join(t2, ["x"]).select(t1) with join_tables(expr) as (r1, r2): expected = ops.JoinChain( @@ -1200,7 +1200,7 @@ def test_join_chain_gets_reused_and_continued_after_a_select(): c = ibis.table(name="c", schema={"e": "int64", "f": "string"}) ab = a.join(b, [a.a == b.c]) - abc = ab[a.b, b.d].join(c, [a.a == c.e]) + abc = ab.select(a.b, b.d).join(c, [a.a == c.e]) with join_tables(abc) as (r1, r2, r3): expected = ops.JoinChain( @@ -1442,8 +1442,8 @@ def test_join_between_joins(): ) t4 = ibis.table([("key3", "string"), ("value4", "double")], "fourth") - left = t1.inner_join(t2, [("key1", "key1")])[t1, t2.value2] - right = t3.inner_join(t4, [("key3", "key3")])[t3, t4.value4] + left = t1.inner_join(t2, [("key1", "key1")]).select(t1, t2.value2) + right = t3.inner_join(t4, [("key3", "key3")]).select(t3, t4.value4) joined = left.inner_join(right, left.key2 == right.key2) @@ -1535,7 +1535,7 @@ def test_join_with_compound_predicate(): (t1.a + t1.a != t2.b) & (t1.b + t1.b != t2.a), ], ) - expr = joined[t1] + expr = joined.select(t1) with join_tables(joined) as (r1, r2): expected = ops.JoinChain( first=r1, diff --git a/ibis/expr/tests/test_visualize.py b/ibis/expr/tests/test_visualize.py index a77b65628051..2f5df98af296 100644 --- a/ibis/expr/tests/test_visualize.py +++ b/ibis/expr/tests/test_visualize.py @@ -30,8 +30,8 @@ def key(node): lambda t: t.a, lambda t: t.a + t.b, lambda t: t.a + t.b > 3**t.a, - lambda t: t[(t.a + t.b * 2 * t.b / t.b**3 > 4) & (t.b > 5)], - lambda t: t[(t.a + t.b * 2 * t.b / t.b**3 > 4) & (t.b > 5)] + lambda t: t.filter((t.a + t.b * 2 * t.b / t.b**3 > 4) & (t.b > 5)), + lambda t: t.filter((t.a + t.b * 2 * t.b / t.b**3 > 4) & (t.b > 5)) .group_by("c") .aggregate(amean=lambda f: f.a.mean(), bsum=lambda f: f.b.sum()), ], @@ -86,7 +86,7 @@ def test_join(how): left = ibis.table([("a", "int64"), ("b", "string")]) right = ibis.table([("b", "string"), ("c", "int64")]) joined = left.join(right, left.b == right.b, how=how) - result = joined[left.a, right.c] + result = joined.select(left.a, right.c) graph = viz.to_graph(result) assert key(result.op()) in graph.source @@ -134,7 +134,7 @@ def test_asof_join(): right = right.mutate(foo=1) joined = api.asof_join(left, right, "time") - result = joined[left, right.foo] + result = joined.select(left, right.foo) graph = viz.to_graph(result) assert key(result.op()) in graph.source diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index d0fe76d10aa1..56745f33be7c 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -3,6 +3,7 @@ import itertools import operator import re +import warnings from collections import deque from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence from keyword import iskeyword @@ -559,16 +560,16 @@ def preview( console_width=console_width, ) - def __getitem__(self, what): - """Select items from a table expression. - - This method implements square bracket syntax for table expressions, - including various forms of projection and filtering. + def __getitem__(self, what: str | int | slice | Sequence[str | int]): + """Select one or more columns or rows from a table expression. Parameters ---------- what - Selection object. This can be a variety of types including strings, ints, lists. + What to select. Options are: + - A `str` column name or `int` column index to select a single column. + - A sequence of column names or indices to select multiple columns. + - A slice to select a subset of rows. Returns ------- @@ -579,10 +580,8 @@ def __getitem__(self, what): Examples -------- >>> import ibis - >>> import ibis.selectors as s - >>> from ibis import _ >>> ibis.options.interactive = True - >>> t = ibis.examples.penguins.fetch() + >>> t = ibis.examples.penguins.fetch().head() >>> t ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ @@ -594,15 +593,9 @@ def __getitem__(self, what): │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │ │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ - │ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ … │ - │ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ … │ - │ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ … │ - │ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ … │ - │ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ … │ - │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - Return a column by name + Select a single column by name: >>> t["island"] ┏━━━━━━━━━━━┓ @@ -615,15 +608,9 @@ def __getitem__(self, what): │ Torgersen │ │ Torgersen │ │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ … │ └───────────┘ - Return the second column, starting from index 0 + Select a single column by index: >>> t.columns[1] 'island' @@ -638,105 +625,11 @@ def __getitem__(self, what): │ Torgersen │ │ Torgersen │ │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ Torgersen │ - │ … │ └───────────┘ - Extract a range of rows - - >>> t[:2] - ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ … │ - │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ … │ - └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t[:5] - ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ … │ - │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ … │ - │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │ - │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ - │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ - └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t[2:5] - ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │ - │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ - │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ - └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ + Select multiple columns by name: - Some backends support negative slice indexing - - >>> t[-5:] # last 5 rows - ┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├───────────┼────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Chinstrap │ Dream │ 55.8 │ 19.8 │ 207 │ … │ - │ Chinstrap │ Dream │ 43.5 │ 18.1 │ 202 │ … │ - │ Chinstrap │ Dream │ 49.6 │ 18.2 │ 193 │ … │ - │ Chinstrap │ Dream │ 50.8 │ 19.0 │ 210 │ … │ - │ Chinstrap │ Dream │ 50.2 │ 18.7 │ 198 │ … │ - └───────────┴────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t[-5:-3] # last 5th to 3rd rows - ┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├───────────┼────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Chinstrap │ Dream │ 55.8 │ 19.8 │ 207 │ … │ - │ Chinstrap │ Dream │ 43.5 │ 18.1 │ 202 │ … │ - └───────────┴────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t[2:-2] # chop off the first two and last two rows - ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │ - │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ - │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ - │ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ … │ - │ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ … │ - │ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ … │ - │ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ … │ - │ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ … │ - │ Adelie │ Torgersen │ 37.8 │ 17.1 │ 186 │ … │ - │ Adelie │ Torgersen │ 37.8 │ 17.3 │ 180 │ … │ - │ … │ … │ … │ … │ … │ … │ - └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - - Select columns - - >>> t[["island", "bill_length_mm"]].head() - ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ - ┃ island ┃ bill_length_mm ┃ - ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ - │ string │ float64 │ - ├───────────┼────────────────┤ - │ Torgersen │ 39.1 │ - │ Torgersen │ 39.5 │ - │ Torgersen │ 40.3 │ - │ Torgersen │ NULL │ - │ Torgersen │ 36.7 │ - └───────────┴────────────────┘ - >>> t["island", "bill_length_mm"].head() + >>> t[["island", "bill_length_mm"]] ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ island ┃ bill_length_mm ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ @@ -748,37 +641,10 @@ def __getitem__(self, what): │ Torgersen │ NULL │ │ Torgersen │ 36.7 │ └───────────┴────────────────┘ - >>> t[_.island, _.bill_length_mm].head() - ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ - ┃ island ┃ bill_length_mm ┃ - ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ - │ string │ float64 │ - ├───────────┼────────────────┤ - │ Torgersen │ 39.1 │ - │ Torgersen │ 39.5 │ - │ Torgersen │ 40.3 │ - │ Torgersen │ NULL │ - │ Torgersen │ 36.7 │ - └───────────┴────────────────┘ - - Filtering - >>> t[t.island.lower() != "torgersen"].head() - ┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ - ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ - ┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ - │ string │ string │ float64 │ float64 │ int64 │ … │ - ├─────────┼────────┼────────────────┼───────────────┼───────────────────┼───┤ - │ Adelie │ Biscoe │ 37.8 │ 18.3 │ 174 │ … │ - │ Adelie │ Biscoe │ 37.7 │ 18.7 │ 180 │ … │ - │ Adelie │ Biscoe │ 35.9 │ 19.2 │ 189 │ … │ - │ Adelie │ Biscoe │ 38.2 │ 18.1 │ 185 │ … │ - │ Adelie │ Biscoe │ 38.8 │ 17.2 │ 180 │ … │ - └─────────┴────────┴────────────────┴───────────────┴───────────────────┴───┘ + Select a range of rows: - Selectors - - >>> t[~s.numeric() | (s.numeric() & ~s.c("year"))].head() + >>> t[:2] ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ @@ -786,45 +652,45 @@ def __getitem__(self, what): ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ … │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ … │ + └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ + >>> t[2:5] + ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ + ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ + │ string │ string │ float64 │ float64 │ int64 │ … │ + ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │ │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t[s.r["bill_length_mm":"body_mass_g"]].head() - ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ - ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ - │ float64 │ float64 │ int64 │ int64 │ - ├────────────────┼───────────────┼───────────────────┼─────────────┤ - │ 39.1 │ 18.7 │ 181 │ 3750 │ - │ 39.5 │ 17.4 │ 186 │ 3800 │ - │ 40.3 │ 18.0 │ 195 │ 3250 │ - │ NULL │ NULL │ NULL │ NULL │ - │ 36.7 │ 19.3 │ 193 │ 3450 │ - └────────────────┴───────────────┴───────────────────┴─────────────┘ """ from ibis.expr.types.logical import BooleanValue - if isinstance(what, slice): - limit, offset = util.slice_to_limit_offset(what, self.count()) - return self.limit(limit, offset=offset) - # skip the self.bind call for single column access with strings or ints - # because dereferencing has significant overhead - elif isinstance(what, str): + if isinstance(what, str): return ops.Field(self.op(), what).to_expr() elif isinstance(what, int): return ops.Field(self.op(), self.columns[what]).to_expr() + elif isinstance(what, slice): + limit, offset = util.slice_to_limit_offset(what, self.count()) + return self.limit(limit, offset=offset) args = [ self.columns[arg] if isinstance(arg, int) else arg for arg in util.promote_list(what) ] + if util.all_of(args, str): + return self.select(args) + + # Once this deprecation is removed, we'll want to error here instead. + warnings.warn( + "Selecting/filtering arbitrary expressions in `Table.__getitem__` is " + "deprecated and will be removed in version 10.0. Please use " + "`Table.select` or `Table.filter` instead.", + FutureWarning, + ) values = self.bind(args) - if isinstance(what, (str, int)): - assert len(values) == 1 - return values[0] - elif util.all_of(values, BooleanValue): + if util.all_of(values, BooleanValue): return self.filter(values) else: return self.select(values) @@ -2923,7 +2789,7 @@ def unpack(self, *columns: str) -> Table: result_columns.extend(expr[field] for field in expr.names) else: result_columns.append(column) - return self[result_columns] + return self.select(result_columns) def info(self) -> Table: """Return summary information about a table. diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py index 0ff9dee0db2e..ee5a76b8d160 100644 --- a/ibis/tests/benchmarks/test_benchmarks.py +++ b/ibis/tests/benchmarks/test_benchmarks.py @@ -53,7 +53,7 @@ def t(): def make_base(t): - return t[ + return t.filter( ( (t.year > 2016) | ((t.year == 2016) & (t.month > 6)) @@ -80,7 +80,7 @@ def make_base(t): & (t.minute <= 5) ) ) - ] + ) @pytest.fixture(scope="module") @@ -394,9 +394,9 @@ def tpc_h02(part, supplier, partsupp, nation, region): .join(region, nation.n_regionkey == region.r_regionkey) ) - subexpr = subexpr[ + subexpr = subexpr.filter( (subexpr.r_name == REGION) & (expr.p_partkey == subexpr.ps_partkey) - ] + ) filters = [ expr.p_size == SIZE, @@ -529,7 +529,7 @@ def eq(a, b): def multiple_joins(table, num_joins): for _ in range(num_joins): table = table.mutate(dummy=ibis.literal("")) - table = table.left_join(table.view(), ["dummy"])[[table]] + table = table.left_join(table.view(), ["dummy"]).select(table) @pytest.mark.parametrize("num_joins", [1, 10]) diff --git a/ibis/tests/expr/test_analysis.py b/ibis/tests/expr/test_analysis.py index 527bbab84c8f..6536d761a82d 100644 --- a/ibis/tests/expr/test_analysis.py +++ b/ibis/tests/expr/test_analysis.py @@ -17,7 +17,7 @@ def test_rewrite_join_projection_without_other_ops(con): table2 = con.table("star2") table3 = con.table("star3") - filtered = table[table["f"] > 0] + filtered = table.filter(table["f"] > 0) pred1 = table["foo_id"] == table2["foo_id"] pred2 = filtered["bar_id"] == table3["bar_id"] @@ -25,7 +25,7 @@ def test_rewrite_join_projection_without_other_ops(con): j1 = filtered.left_join(table2, [pred1]) j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields - view = j2[[filtered, table2["value1"], table3["value2"]]] + view = j2.select(filtered, table2["value1"], table3["value2"]) with join_tables(j2) as (r1, r2, r3): # Construct the thing we expect to obtain @@ -90,11 +90,11 @@ def test_filter_on_projected_field(con): .join(orders, orders.o_custkey == customer.c_custkey) ) - tpch = all_join[fields_of_interest] + tpch = all_join.select(*fields_of_interest) # Correlated subquery, yikes! t2 = tpch.view() - conditional_avg = t2[(t2.region == tpch.region)].amount.mean() + conditional_avg = t2.filter(t2.region == tpch.region).amount.mean() # `amount` is part of the projection above as an aliased field amount_filter = tpch.amount > conditional_avg @@ -116,7 +116,7 @@ def test_join_predicate_from_derived_raises(): table2 = ibis.table([("key", "string"), ("value", "double")], "bar_table") filter_pred = table["f"] > 0 - table3 = table[filter_pred] + table3 = table.filter(filter_pred) with pytest.raises(com.IntegrityError, match="they belong to another relation"): # TODO(kszucs): could be smarter actually and rewrite the predicate @@ -153,8 +153,8 @@ def test_filter_self_join(): metrics={"total": purchases.amount.sum()}, ) - left = agged[agged.kind == "foo"] - right = agged[agged.kind == "bar"] + left = agged.filter(agged.kind == "foo") + right = agged.filter(agged.kind == "bar") assert left.op() == ops.Filter( parent=agged, predicates=[agged.kind == "foo"], @@ -186,11 +186,13 @@ def test_filter_self_join(): def test_is_ancestor_analytic(): x = ibis.table(ibis.schema([("col", "int32")]), "x") - with_filter_col = x[x.columns + [ibis.null().name("filter")]] - filtered = with_filter_col[with_filter_col["filter"].isnull()] - subquery = filtered[filtered.columns] + with_filter_col = x.select(x.columns + [ibis.null().name("filter")]) + filtered = with_filter_col.filter(with_filter_col["filter"].isnull()) + subquery = filtered.select(filtered.columns) - with_analytic = subquery[subquery.columns + [subquery.count().name("analytic")]] + with_analytic = subquery.select( + subquery.columns + [subquery.count().name("analytic")] + ) assert not subquery.op().equals(with_analytic.op()) @@ -252,10 +254,10 @@ def test_select_filter_mutate_fusion(): t = ibis.table(ibis.schema([("col", "float32")]), "t") - t1 = t[["col"]] + t1 = t.select("col") assert t1.op() == ops.Project(parent=t, values={"col": t.col}) - t2 = t1[t1["col"].isnan()] + t2 = t1.filter(t1["col"].isnan()) assert t2.op() == ops.Filter(parent=t1, predicates=[t1.col.isnan()]) t3 = t2.mutate(col=t2["col"].cast("int32")) diff --git a/ibis/tests/expr/test_analytics.py b/ibis/tests/expr/test_analytics.py index c6c57c3b2e5a..ab2f6a17b7af 100644 --- a/ibis/tests/expr/test_analytics.py +++ b/ibis/tests/expr/test_analytics.py @@ -44,7 +44,7 @@ def test_category_project(alltypes): t = alltypes tier = t.double_col.bucket([0, 50, 100]).name("tier") - expr = t[tier, t] + expr = t.select(tier, t) assert isinstance(expr.tier, ir.IntegerColumn) @@ -99,7 +99,7 @@ def test_histogram(alltypes): def test_topk_analysis_bug(airlines): # GH #398 dests = ["ORD", "JFK", "SFO"] - t = airlines[airlines.dest.isin(dests)] + t = airlines.filter(airlines.dest.isin(dests)) filtered = t.semi_join(t.origin.topk(10, by=t.arrdelay.mean()), "origin") assert filtered is not None diff --git a/ibis/tests/expr/test_case.py b/ibis/tests/expr/test_case.py index dbd0b9d21746..97bfcba5d664 100644 --- a/ibis/tests/expr/test_case.py +++ b/ibis/tests/expr/test_case.py @@ -211,7 +211,7 @@ def test_case_mixed_type(): expr = ( t0.three.case().when(0, "low").when(1, "high").else_("null").end().name("label") ) - result = t0[expr] + result = t0.select(expr) assert result["label"].type().equals(dt.string) diff --git a/ibis/tests/expr/test_format_sql_operations.py b/ibis/tests/expr/test_format_sql_operations.py index 4025aa11cb52..500866ad86b2 100644 --- a/ibis/tests/expr/test_format_sql_operations.py +++ b/ibis/tests/expr/test_format_sql_operations.py @@ -31,7 +31,7 @@ def test_memoize_database_table(con, snapshot): table2 = con.table("test2") filter_pred = table["f"] > 0 - table3 = table[filter_pred] + table3 = table.filter(filter_pred) join_pred = table3["g"] == table2["key"] joined = table2.inner_join(table3, [join_pred]) @@ -56,7 +56,7 @@ def test_memoize_insert_sort_key(con, snapshot): dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean() ) - worst = expr[expr.dev.notnull()].order_by(ibis.desc("dev")).limit(10) + worst = expr.filter(expr.dev.notnull()).order_by(ibis.desc("dev")).limit(10) result = repr(worst) assert result.count("airlines") == 1 diff --git a/ibis/tests/expr/test_struct.py b/ibis/tests/expr/test_struct.py index 92013d6c6390..9b6d8914dff6 100644 --- a/ibis/tests/expr/test_struct.py +++ b/ibis/tests/expr/test_struct.py @@ -62,11 +62,11 @@ def test_struct_pickle(): def test_lift(t): - assert t.a.lift().equals(t[_.a.b, _.a.c]) + assert t.a.lift().equals(t.select(_.a.b, _.a.c)) def test_unpack_from_table(t): - assert t.unpack("a").equals(t[_.a.b, _.a.c, _.d]) + assert t.unpack("a").equals(t.select(_.a.b, _.a.c, _.d)) def test_lift_join(t, s): @@ -86,7 +86,7 @@ def test_lift_join(t, s): def test_unpack_join_from_table(t, s): join = t.join(s, t.d == s.a.g) result = join.unpack("a_right") - expected = join[_.a, _.d, _.a_right.f, _.a_right.g] + expected = join.select(_.a, _.d, _.a_right.f, _.a_right.g) assert result.equals(expected) diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index ba9e7b218001..6d802403dca9 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -97,7 +97,7 @@ def test_getitem_column_select(table): def test_select_using_selector(table): - expr = table[s.numeric()] + expr = table.select(s.numeric()) expected = table.select( table.a, table.b, @@ -124,9 +124,8 @@ def test_getitem_attribute(table): result = table.a assert_equal(result, table["a"]) - # Project and add a name that conflicts with a Table built-in - # attribute - view = table[[table, table["a"].name("schema")]] + # Project and add a name that conflicts with a Table built-in attribute + view = table.mutate(schema=table.a) assert not isinstance(view.schema, Column) @@ -176,7 +175,7 @@ def test_projection_with_exprs(table): col_exprs = [table["b"].log().name("log_b"), mean_diff.name("mean_diff")] - proj = table[col_exprs + ["g"]] + proj = table.select(col_exprs + ["g"]) schema = proj.schema() assert schema.names == ("log_b", "mean_diff", "g") assert schema.types == (dt.double, dt.double, dt.string) @@ -219,7 +218,7 @@ def test_projection_with_star_expr(table): t = table # it lives! - proj = t[t, new_expr] + proj = t.select(t, new_expr) repr(proj) ex_names = table.schema().names + ("bigger_a",) @@ -228,14 +227,35 @@ def test_projection_with_star_expr(table): # cannot pass an invalid table expression t2 = t.aggregate([t["a"].sum().name("sum(a)")], by=["g"]) with pytest.raises(IntegrityError): - t[[t2]] + t.select(t2) # TODO: there may be some ways this can be invalid -def test_projection_convenient_syntax(table): - proj = table[table, table["a"].name("foo")] - proj2 = table[[table, table["a"].name("foo")]] - assert_equal(proj, proj2) +def test_deprecated_getitem_select_filter(table): + # Select + sol1 = table.select(table, table.a.name("foo")) + with pytest.warns(FutureWarning): + e1 = table[table, table["a"].name("foo")] + e2 = table[[table, table["a"].name("foo")]] + + assert_equal(e1, sol1) + assert_equal(e2, sol1) + + # Select with selector + sol2 = table.select(s.numeric()) + with pytest.warns(FutureWarning): + e3 = table[s.numeric()] + + assert_equal(e3, sol2) + + # Filter + sol3 = table.filter(table.a > 10, table.a < 20) + with pytest.warns(FutureWarning): + e4 = table[table.a > 10, table.a < 20] + e5 = table[[table.a > 10, table.a < 20]] + + assert_equal(e4, sol3) + assert_equal(e5, sol3) def test_projection_mutate_analysis_bug(con): @@ -243,7 +263,7 @@ def test_projection_mutate_analysis_bug(con): t = con.table("airlines") - filtered = t[t.depdelay.notnull()] + filtered = t.filter(t.depdelay.notnull()) leg = ibis.literal("-").join([t.origin, t.dest]) mutated = filtered.mutate(leg=leg) @@ -251,19 +271,6 @@ def test_projection_mutate_analysis_bug(con): mutated["year", "month", "day", "depdelay", "leg"] -def test_projection_self(table): - result = table[table] - expected = table.select(table) - - assert_equal(result, expected) - - -def test_projection_array_expr(table): - result = table[table.a] - expected = table[[table.a]] - assert_equal(result, expected) - - @pytest.mark.parametrize("empty", [list(), dict()]) def test_projection_no_expr(table, empty): with pytest.raises(com.IbisTypeError, match="must select at least one"): @@ -299,7 +306,7 @@ def test_mutate(table): kw5=ibis.literal(9), kw6=ibis.literal("ten"), ) - expected = table[ + expected = table.select( table, (table.a + 1).name("x1"), table.b.sum().name("x2"), @@ -313,7 +320,7 @@ def test_mutate(table): (table.a + 8).name("kw4"), ibis.literal(9).name("kw5"), ibis.literal("ten").name("kw6"), - ] + ) assert_equal(expr, expected) @@ -322,7 +329,7 @@ def test_mutate_alter_existing_columns(table): foo = table.d * 2 expr = table.mutate(f=new_f, foo=foo) - expected = table[ + expected = table.select( "a", "b", "c", @@ -335,7 +342,7 @@ def test_mutate_alter_existing_columns(table): "j", "k", foo.name("foo"), - ] + ) assert_equal(expr, expected) @@ -345,25 +352,11 @@ def test_replace_column(): expr = tb.b.cast("int32") tb2 = tb.mutate(b=expr) - expected = tb[tb.a, expr.name("b"), tb.c] + expected = tb.select(tb.a, expr.name("b"), tb.c) assert_equal(tb2, expected) -def test_filter_no_list(table): - pred = table.a > 5 - - result = table.filter(pred) - expected = table[pred] - assert_equal(result, expected) - - -def test_add_predicate(table): - pred = table["a"] > 5 - result = table[pred] - assert isinstance(result.op(), ops.Filter) - - def test_invalid_predicate(table, schema): # a lookalike table2 = api.table(schema, name="bar") @@ -379,12 +372,12 @@ def test_add_predicate_coalesce(table): pred1 = table["a"] > 5 pred2 = table["b"] > 0 - result = simplify(table[pred1][pred2].op()).to_expr() + result = simplify(table.filter(pred1).filter(pred2).op()).to_expr() expected = table.filter([pred1, pred2]) assert_equal(result, expected) # 59, if we are not careful, we can obtain broken refs - subset = table[pred1] + subset = table.filter(pred1) result = simplify(subset.filter([subset["b"] > 0]).op()).to_expr() assert_equal(result, expected) @@ -392,7 +385,7 @@ def test_add_predicate_coalesce(table): def test_repr_same_but_distinct_objects(con): t = con.table("test1") t_copy = con.table("test1") - table2 = t[t_copy["f"] > 0] + table2 = t.filter(t_copy["f"] > 0) result = repr(table2) assert result.count("DatabaseTable") == 1 @@ -402,10 +395,10 @@ def test_filter_fusion_distinct_table_objects(con): t = con.table("test1") tt = con.table("test1") - expr = t[t.f > 0][t.c > 0] - expr2 = t[t.f > 0][tt.c > 0] - expr3 = t[tt.f > 0][tt.c > 0] - expr4 = t[tt.f > 0][t.c > 0] + expr = t.filter(t.f > 0).filter(t.c > 0) + expr2 = t.filter(t.f > 0).filter(tt.c > 0) + expr3 = t.filter(tt.f > 0).filter(tt.c > 0) + expr4 = t.filter(tt.f > 0).filter(t.c > 0) assert_equal(expr, expr2) assert repr(expr) == repr(expr2) @@ -1095,18 +1088,6 @@ def test_join_combo_with_projection(table): repr(proj) -def test_join_getitem_projection(con): - region = con.table("tpch_region") - nation = con.table("tpch_nation") - - pred = region.r_regionkey == nation.n_regionkey - joined = region.inner_join(nation, pred) - - result = joined[nation] - expected = joined.select(nation) - assert_equal(result, expected) - - def test_self_join(table): # Self-joins are problematic with this design because column # expressions may reference either the left or right For example: @@ -1127,7 +1108,7 @@ def test_self_join(table): joined = left.inner_join(right, [right["g"] == left["g"]]) # Project out left table schema - proj = joined[[left]] + proj = joined.select(left) assert_equal(proj.schema(), left.schema()) # Try aggregating on top of joined @@ -1148,18 +1129,6 @@ def test_self_join_no_view_convenience(table): assert result.columns == expected_cols -def test_join_reference_bug(con): - # GH#403 - orders = con.table("tpch_orders") - customer = con.table("tpch_customer") - lineitem = con.table("tpch_lineitem") - - items = orders.join(lineitem, orders.o_orderkey == lineitem.l_orderkey)[ - lineitem, orders.o_custkey, orders.o_orderpriority - ].join(customer, [("o_custkey", "c_custkey")]) - items["o_orderpriority"].value_counts() - - def test_join_project_after(table): # e.g. # @@ -1494,21 +1463,12 @@ def test_unresolved_existence_predicate(t1, t2): filtered = t2.filter(t1.key1 == t2.key1) subquery = ops.ExistsSubquery(filtered) expected = ops.Filter(parent=t1, predicates=[subquery]) - assert t1[expr].op() == expected + assert t1.filter(expr).op() == expected filtered = t1.filter(t1.key1 == t2.key1) subquery = ops.ExistsSubquery(filtered) expected = ops.Filter(parent=t2, predicates=[subquery]) - assert t2[expr].op() == expected - - -def test_resolve_existence_predicate(t1, t2): - expr = t1[(t1.key1 == t2.key1).any()] - op = expr.op() - assert isinstance(op, ops.Filter) - - pred = op.predicates[0].to_expr() - assert isinstance(pred.op(), ops.ExistsSubquery) + assert t2.filter(expr).op() == expected def test_aggregate_metrics(table): @@ -1564,11 +1524,8 @@ def test_filter(table): m = table.mutate(foo=table.f * 2, bar=table.e / 2) result = m.filter(lambda x: x.foo > 10) - result2 = m[lambda x: x.foo > 10] - expected = m[m.foo > 10] - + expected = m.filter(m.foo > 10) assert_equal(result, expected) - assert_equal(result2, expected) result = m.filter([lambda x: x.foo > 10, lambda x: x.bar < 0]) expected = m.filter([m.foo > 10, m.bar < 0]) @@ -1602,10 +1559,8 @@ def f(x): return (x.foo * 2).name("bar") result = m.select([f, "f"]) - result2 = m[f, "f"] expected = m.select([f(m), "f"]) assert_equal(result, expected) - assert_equal(result2, expected) def test_mutate2(table): @@ -1774,20 +1729,14 @@ def test_merge_as_of_allows_overlapping_columns(): name="t", ) - signal_one = table[ + signal_one = table.filter( table["field"].contains("signal_one") & table["field"].contains("current") - ] - signal_one = signal_one[ - "value", "timestamp_received", "field" - ] # select columns we care about + )["value", "timestamp_received", "field"] signal_one = signal_one.rename(current="value", signal_one="field") - signal_two = table[ + signal_two = table.filter( table["field"].contains("signal_two") & table["field"].contains("voltage") - ] - signal_two = signal_two[ - "value", "timestamp_received", "field" - ] # select columns we care about + )["value", "timestamp_received", "field"] signal_two = signal_two.rename(voltage="value", signal_two="field") merged = signal_one.asof_join(signal_two, "timestamp_received") @@ -1806,7 +1755,7 @@ def test_select_from_unambiguous_join_with_strings(): t = ibis.table([("a", "int64"), ("b", "string")]) s = ibis.table([("b", "int64"), ("c", "string")]) joined = t.left_join(s, [t.b == s.c]) - expr = joined[t, "c"] + expr = joined.select(t, "c") assert expr.columns == ["a", "b", "c"] @@ -1913,12 +1862,6 @@ def test_default_backend_with_unbound_table(): assert expr.execute() -def test_array_string_compare(): - t = ibis.table(schema=dict(by="string", words="array"), name="t") - expr = t[t.by == "foo"].mutate(words=_.words.unnest()).filter(_.words == "the") - assert expr is not None - - @pytest.mark.parametrize("value", [True, False]) def test_filter_with_literal(value): t = ibis.table(dict(a="string")) diff --git a/ibis/tests/expr/test_value_exprs.py b/ibis/tests/expr/test_value_exprs.py index cc5a756437e5..e95bda04d864 100644 --- a/ibis/tests/expr/test_value_exprs.py +++ b/ibis/tests/expr/test_value_exprs.py @@ -291,7 +291,7 @@ def test_isin_notin_list(table, container): def test_value_counts(table, string_col): bool_clause = table[string_col].notin(["1", "4", "7"]) - expr = table[bool_clause][string_col].value_counts() + expr = table.filter(bool_clause)[string_col].value_counts() assert isinstance(expr, ir.Table) @@ -1362,7 +1362,7 @@ def test_select_on_unambiguous_join(join_method): def test_chained_select_on_join(): t = ibis.table([("a", dt.int64)], name="t") s = ibis.table([("a", dt.int64), ("b", dt.string)], name="s") - join = t.join(s)[t.a, s.b] + join = t.join(s).select(t.a, s.b) expr1 = join["a", "b"] expr2 = join.select(["a", "b"]) assert expr1.equals(expr2) @@ -1376,7 +1376,7 @@ def test_repr_list_of_lists(): def test_repr_list_of_lists_in_table(): t = ibis.table([("a", "int64")], name="t") lit = ibis.literal([[1]]) - expr = t[t, lit.name("array_of_array")] + expr = t.select(t, lit.name("array_of_array")) repr(expr) @@ -1504,7 +1504,7 @@ def test_deferred_r_ops(op_name, expected_left, expected_right): right = _.a op = getattr(operator, op_name) - expr = t[op(left, right).name("b")] + expr = t.select(op(left, right).name("b")) node = expr.op().values["b"] assert node.left.equals(expected_left(t).op()) assert node.right.equals(expected_right(t).op()) @@ -1675,7 +1675,7 @@ def test_rowid_only_physical_tables(): table = ibis.table({"x": "int", "y": "string"}, name="t") table.rowid() # works - table[table.rowid(), table.x].filter(_.x > 10) # works + table.select(table.rowid(), table.x).filter(_.x > 10) # works with pytest.raises(com.IbisTypeError, match="only valid for physical tables"): table.filter(table.x > 0).rowid() diff --git a/ibis/tests/expr/test_window_frames.py b/ibis/tests/expr/test_window_frames.py index 5f6ac6dd0fb6..7477544f3da9 100644 --- a/ibis/tests/expr/test_window_frames.py +++ b/ibis/tests/expr/test_window_frames.py @@ -511,7 +511,9 @@ def metric(x): return x.arrdelay.mean().name("avg_delay") annual_delay = ( - t[t.dest.isin(["JFK", "SFO"])].group_by(["dest", "year"]).aggregate(metric) + t.filter(t.dest.isin(["JFK", "SFO"])) + .group_by(["dest", "year"]) + .aggregate(metric) ) what = annual_delay.group_by("dest") enriched = what.mutate(grand_avg=annual_delay.avg_delay.mean()) @@ -521,7 +523,7 @@ def metric(x): .name("grand_avg") .over(ibis.window(group_by=annual_delay.dest)) ) - expected = annual_delay[annual_delay, expr] + expected = annual_delay.select(annual_delay, expr) assert enriched.equals(expected)