refactor(api): remove explicit use of .projection in favor of the s…

…horter `.select`
ibis-project · Apr 6, 2023 · 73df8df · 73df8df
1 parent 41b2949
commit 73df8df
Show file tree

Hide file tree

Showing 25 changed files with 82 additions and 91 deletions.
diff --git a/docs/ibis-for-sql-programmers.ipynb b/docs/ibis-for-sql-programmers.ipynb
diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py
@@ -154,7 +154,7 @@ def _exists_subquery(t, op):
     filtered = (
         op.foreign_table.to_expr()
         .filter([pred.to_expr() for pred in op.predicates])
-        .projection([ir.literal(1).name("")])
+        .select(ir.literal(1).name(""))
     )
 
     sub_ctx = ctx.subcontext()

diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py
@@ -150,7 +150,7 @@ def table_column(translator, op):
     # context, we should format as a subquery
     if translator.permit_subquery and ctx.is_foreign_expr(op.table):
         # TODO(kszucs): avoid the expression roundtrip
-        proj_expr = op.table.to_expr().projection([op.name]).to_array().op()
+        proj_expr = op.table.to_expr().select([op.name]).to_array().op()
         return table_array_view(translator, proj_expr)
 
     alias = ctx.get_ref(op.table, search_parents=True)
@@ -168,7 +168,7 @@ def exists_subquery(translator, op):
     filtered = op.foreign_table.to_expr().filter(
         [pred.to_expr() for pred in op.predicates]
     )
-    node = filtered.projection([dummy]).op()
+    node = filtered.select(dummy).op()
 
     subquery = ctx.get_compiled_expr(node)
 

diff --git a/ibis/backends/clickhouse/tests/test_operators.py b/ibis/backends/clickhouse/tests/test_operators.py
@@ -186,7 +186,7 @@ def test_negate(con, alltypes, translate, column, operator):
 )
 def test_negate_non_boolean(alltypes, field, df):
     t = alltypes.limit(10)
-    expr = t.projection([(-t[field]).name(field)])
+    expr = t.select((-t[field]).name(field))
     result = expr.execute()[field]
     expected = -df.head(10)[field]
     tm.assert_series_equal(result, expected)

diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py
@@ -283,7 +283,7 @@ def test_filter_predicates(diamonds):
 
     expr = diamonds
     for pred in predicates:
-        expr = expr[pred(expr)].projection([expr])
+        expr = expr[pred(expr)].select(expr)
 
     expr.execute()
 

diff --git a/ibis/backends/dask/tests/execution/test_arrays.py b/ibis/backends/dask/tests/execution/test_arrays.py
@@ -12,12 +12,10 @@
 
 
 def test_array_length(t):
-    expr = t.projection(
-        [
-            t.array_of_float64.length().name('array_of_float64_length'),
-            t.array_of_int64.length().name('array_of_int64_length'),
-            t.array_of_strings.length().name('array_of_strings_length'),
-        ]
+    expr = t.select(
+        t.array_of_float64.length().name('array_of_float64_length'),
+        t.array_of_int64.length().name('array_of_int64_length'),
+        t.array_of_strings.length().name('array_of_strings_length'),
     )
     result = expr.compile()
     expected = dd.from_pandas(
@@ -173,7 +171,7 @@ def test_array_index_scalar(client, index):
 @pytest.mark.parametrize('n', [1, 3, 4, 7, -2])  # negative returns empty list
 @pytest.mark.parametrize('mul', [lambda x, n: x * n, lambda x, n: n * x])
 def test_array_repeat(t, df, n, mul):
-    expr = t.projection([mul(t.array_of_strings, n).name('repeated')])
+    expr = t.select(repeated=mul(t.array_of_strings, n))
     result = expr.execute()
     expected = pd.DataFrame({'repeated': df.array_of_strings * n})
     tm.assert_frame_equal(result, expected)

diff --git a/ibis/backends/impala/client.py b/ibis/backends/impala/client.py
@@ -310,7 +310,7 @@ def insert(
             if partition is not None:
                 partition_schema = self.partition_schema()
                 partition_schema_names = frozenset(partition_schema.names)
-                expr = expr.projection(
+                expr = expr.select(
                     [
                         column
                         for column in expr.columns

diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py
@@ -373,7 +373,7 @@ def test_filter_predicates(con):
 
     expr = t
     for pred in predicates:
-        expr = expr[pred(expr)].projection([expr])
+        expr = expr[pred(expr)].select(expr)
 
     expr.execute()
 
@@ -672,7 +672,7 @@ def test_identical_to(con, left, right, expected):
 
 def test_not(alltypes):
     t = alltypes.limit(10)
-    expr = t.projection([(~t.double_col.isnull()).name('double_col')])
+    expr = t.select(double_col=~t.double_col.isnull())
     result = expr.execute().double_col
     expected = ~t.execute().double_col.isnull()
     tm.assert_series_equal(result, expected)

diff --git a/ibis/backends/impala/tests/test_sql.py b/ibis/backends/impala/tests/test_sql.py
@@ -27,7 +27,7 @@ def limit_cte_extract(con):
     alltypes = con.table('functional_alltypes')
     t = alltypes.limit(100)
     t2 = t.view()
-    return t.join(t2).projection(t)
+    return t.join(t2).select(t)
 
 
 @pytest.mark.parametrize(
@@ -52,7 +52,7 @@ def test_nested_join_base(snapshot):
     t = ibis.table(dict(uuid='string', ts='timestamp'), name='t')
     counts = t.group_by('uuid').size()
     max_counts = counts.group_by('uuid').aggregate(max_count=lambda x: x['count'].max())
-    result = max_counts.left_join(counts, 'uuid').projection([counts])
+    result = max_counts.left_join(counts, 'uuid').select(counts)
     compiled_result = ImpalaCompiler.to_sql(result)
     snapshot.assert_match(compiled_result, "out.sql")
 
@@ -68,10 +68,10 @@ def test_nested_joins_single_cte(snapshot):
 
     main_kw = max_counts.left_join(
         counts, ['uuid', max_counts.max_count == counts['count']]
-    ).projection([counts])
+    ).select(counts)
 
-    result = main_kw.left_join(last_visit, 'uuid').projection(
-        [main_kw, last_visit.last_visit]
+    result = main_kw.left_join(last_visit, 'uuid').select(
+        main_kw, last_visit.last_visit
     )
     compiled_result = ImpalaCompiler.to_sql(result)
     snapshot.assert_match(compiled_result, "out.sql")

diff --git a/ibis/backends/impala/tests/test_window.py b/ibis/backends/impala/tests/test_window.py
@@ -59,7 +59,7 @@ def test_window_frame_specs(alltypes, window, snapshot):
     t = alltypes
 
     w2 = window.order_by(t.f)
-    expr = t.projection([t.d.sum().over(w2).name('foo')])
+    expr = t.select(foo=t.d.sum().over(w2))
     assert_sql_equal(expr, snapshot)
 
 
@@ -83,8 +83,8 @@ def test_cumulative_functions(alltypes, name, snapshot):
     expr = cumfunc().over(w).name("foo")
     expected = func().over(ibis.cumulative_window(order_by=t.d)).name("foo")
 
-    expr1 = t.projection(expr)
-    expr2 = t.projection(expected)
+    expr1 = t.select(expr)
+    expr2 = t.select(expected)
 
     assert_sql_equal(expr1, snapshot, "out1.sql")
     assert_sql_equal(expr2, snapshot, "out2.sql")
@@ -95,7 +95,7 @@ def test_nested_analytic_function(alltypes, snapshot):
 
     w = window(order_by=t.f)
     expr = (t.f - t.f.lag()).lag().over(w).name('foo')
-    result = t.projection([expr])
+    result = t.select(expr)
     assert_sql_equal(result, snapshot)
 
 
@@ -112,7 +112,7 @@ def test_multiple_windows(alltypes, snapshot):
     w = window(group_by=t.g)
 
     expr = t.f.sum().over(w) - t.f.sum()
-    proj = t.projection([t.g, expr.name('result')])
+    proj = t.select(t.g, result=expr)
 
     assert_sql_equal(proj, snapshot)
 
@@ -154,7 +154,7 @@ def test_unsupported_aggregate_functions(alltypes, column, op):
     t = alltypes
     w = ibis.window(order_by=t.d)
     expr = getattr(t[column], op)()
-    proj = t.projection([expr.over(w).name('foo')])
+    proj = t.select(foo=expr.over(w))
     with pytest.raises(com.TranslationError):
         ImpalaCompiler.to_sql(proj)
 
@@ -172,5 +172,5 @@ def test_propagate_nested_windows(alltypes, snapshot):
     ex_expr = (t.f - t.f.lag().over(w)).lag().over(w)
     assert_equal(result, ex_expr)
 
-    expr = t.projection(col.over(w).name('foo'))
+    expr = t.select(col.over(w).name('foo'))
     assert_sql_equal(expr, snapshot)
diff --git a/ibis/backends/pandas/tests/execution/test_arrays.py b/ibis/backends/pandas/tests/execution/test_arrays.py
@@ -17,12 +17,10 @@ def test_array_literal(client, arr, create_arr_expr):
 
 
 def test_array_length(t):
-    expr = t.projection(
-        [
-            t.array_of_float64.length().name('array_of_float64_length'),
-            t.array_of_int64.length().name('array_of_int64_length'),
-            t.array_of_strings.length().name('array_of_strings_length'),
-        ]
+    expr = t.select(
+        t.array_of_float64.length().name('array_of_float64_length'),
+        t.array_of_int64.length().name('array_of_int64_length'),
+        t.array_of_strings.length().name('array_of_strings_length'),
     )
     result = expr.execute()
     expected = pd.DataFrame(

diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py
@@ -743,9 +743,7 @@ def test_simple_window(alltypes, func, df):
     t = alltypes
     f = getattr(t.double_col, func)
     df_f = getattr(df.double_col, func)
-    result = (
-        t.projection([(t.double_col - f()).name('double_col')]).execute().double_col
-    )
+    result = t.select((t.double_col - f()).name('double_col')).execute().double_col
     expected = df.double_col - df_f()
     tm.assert_series_equal(result, expected)
 
@@ -761,7 +759,7 @@ def test_rolling_window(alltypes, func, df):
     window = ibis.window(order_by=t.timestamp_col, preceding=6, following=0)
     f = getattr(t.double_col, func)
     df_f = getattr(df.double_col.rolling(7, min_periods=0), func)
-    result = t.projection([f().over(window).name('double_col')]).execute().double_col
+    result = t.select(f().over(window).name('double_col')).execute().double_col
     expected = df_f()
     tm.assert_series_equal(result, expected)
 
@@ -797,7 +795,7 @@ def rolled(df):
 
     f = getattr(t.double_col, func)
     expr = f().over(window).name('double_col')
-    result = t.projection([expr]).execute().double_col
+    result = t.select(expr).execute().double_col
     expected = df.groupby('string_col').apply(roller(func)).reset_index(drop=True)
     tm.assert_series_equal(result, expected)
 
@@ -807,7 +805,7 @@ def test_cumulative_simple_window(alltypes, func, df):
     t = alltypes
     f = getattr(t.double_col, func)
     col = t.double_col - f().over(ibis.cumulative_window())
-    expr = t.projection([col.name('double_col')])
+    expr = t.select(col.name('double_col'))
     result = expr.execute().double_col
     expected = df.double_col - getattr(df.double_col, 'cum%s' % func)()
     tm.assert_series_equal(result, expected)
@@ -819,7 +817,7 @@ def test_cumulative_partitioned_window(alltypes, func, df):
     df = df.sort_values('string_col').reset_index(drop=True)
     window = ibis.cumulative_window(group_by=t.string_col)
     f = getattr(t.double_col, func)
-    expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
+    expr = t.select((t.double_col - f().over(window)).name('double_col'))
     result = expr.execute().double_col
     expected = df.groupby(df.string_col).double_col.transform(
         lambda c: c - getattr(c, 'cum%s' % func)()
@@ -833,7 +831,7 @@ def test_cumulative_ordered_window(alltypes, func, df):
     df = df.sort_values('timestamp_col').reset_index(drop=True)
     window = ibis.cumulative_window(order_by=t.timestamp_col)
     f = getattr(t.double_col, func)
-    expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
+    expr = t.select((t.double_col - f().over(window)).name('double_col'))
     result = expr.execute().double_col
     expected = df.double_col - getattr(df.double_col, 'cum%s' % func)()
     tm.assert_series_equal(result, expected)
@@ -845,7 +843,7 @@ def test_cumulative_partitioned_ordered_window(alltypes, func, df):
     df = df.sort_values(['string_col', 'timestamp_col']).reset_index(drop=True)
     window = ibis.cumulative_window(order_by=t.timestamp_col, group_by=t.string_col)
     f = getattr(t.double_col, func)
-    expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
+    expr = t.select((t.double_col - f().over(window)).name('double_col'))
     result = expr.execute().double_col
     method = operator.methodcaller(f'cum{func}')
     expected = df.groupby(df.string_col).double_col.transform(lambda c: c - method(c))
@@ -931,12 +929,10 @@ def array_types(con):
 
 
 def test_array_length(array_types):
-    expr = array_types.projection(
-        [
-            array_types.x.length().name('x_length'),
-            array_types.y.length().name('y_length'),
-            array_types.z.length().name('z_length'),
-        ]
+    expr = array_types.select(
+        array_types.x.length().name('x_length'),
+        array_types.y.length().name('y_length'),
+        array_types.z.length().name('z_length'),
     )
     result = expr.execute()
     expected = pd.DataFrame(
@@ -995,7 +991,7 @@ def test_array_index(array_types, index):
     ],
 )
 def test_array_repeat(array_types, n, mul):
-    expr = array_types.projection([mul(array_types.x, n).name('repeated')])
+    expr = array_types.select(mul(array_types.x, n).name('repeated'))
     result = expr.execute()
     expected = pd.DataFrame(
         {'repeated': array_types.x.execute().map(lambda x, n=n: mul(x, n))}
@@ -1013,9 +1009,9 @@ def test_array_repeat(array_types, n, mul):
 def test_array_concat(array_types, catop):
     t = array_types
     x, y = t.x.cast('array<string>').name('x'), t.y
-    expr = t.projection([catop(x, y).name('catted')])
+    expr = t.select(catop(x, y).name('catted'))
     result = expr.execute()
-    tuples = t.projection([x, y]).execute().itertuples(index=False)
+    tuples = t.select(x, y).execute().itertuples(index=False)
     expected = pd.DataFrame({'catted': [catop(i, j) for i, j in tuples]})
     tm.assert_frame_equal(result, expected)
 
@@ -1159,7 +1155,7 @@ def test_ntile(con):
 def test_not_and_negate_bool(con, opname, df):
     op = getattr(operator, opname)
     t = con.table('functional_alltypes').limit(10)
-    expr = t.projection([op(t.bool_col).name('bool_col')])
+    expr = t.select(op(t.bool_col).name('bool_col'))
     result = expr.execute().bool_col
     expected = op(df.head(10).bool_col)
     tm.assert_series_equal(result, expected)
@@ -1180,15 +1176,15 @@ def test_not_and_negate_bool(con, opname, df):
 )
 def test_negate_non_boolean(con, field, df):
     t = con.table('functional_alltypes').limit(10)
-    expr = t.projection([(-t[field]).name(field)])
+    expr = t.select((-t[field]).name(field))
     result = expr.execute()[field]
     expected = -df.head(10)[field]
     tm.assert_series_equal(result, expected)
 
 
 def test_negate_boolean(con, df):
     t = con.table('functional_alltypes').limit(10)
-    expr = t.projection([(-t.bool_col).name('bool_col')])
+    expr = t.select((-t.bool_col).name('bool_col'))
     result = expr.execute().bool_col
     expected = -df.head(10).bool_col
     tm.assert_series_equal(result, expected)

diff --git a/ibis/backends/pyspark/tests/test_array.py b/ibis/backends/pyspark/tests/test_array.py
@@ -134,7 +134,7 @@ def test_array_concat_scalar(client, op):
 def test_array_repeat(client, n, mul):
     table = client.table('array_table')
 
-    expr = table.projection([mul(table.array_int, n).name('repeated')])
+    expr = table.select(mul(table.array_int, n).name('repeated'))
     result = expr.execute()
 
     df = table.compile().toPandas()

diff --git a/ibis/backends/sqlite/tests/test_functions.py b/ibis/backends/sqlite/tests/test_functions.py
@@ -659,7 +659,7 @@ def test_truncate_from_connection(con, alltypes):
 
 def test_not(alltypes):
     t = alltypes.limit(10)
-    expr = t.projection([(~t.double_col.isnull()).name('double_col')])
+    expr = t.select([(~t.double_col.isnull()).name('double_col')])
     result = expr.execute().double_col
     expected = ~t.execute().double_col.isnull()
     tm.assert_series_equal(result, expected)

diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py
@@ -68,7 +68,7 @@ def get_result(self):
         for other in self.tables[1:]:
             table = table.cross_join(other)
 
-        return table.projection([subbed_expr])
+        return table.select(subbed_expr)
 
     def _visit(self, expr):
         assert isinstance(expr, ir.Expr), type(expr)

diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py
@@ -659,7 +659,7 @@ def _dedup_join_columns(expr, suffixes: tuple[str, str]):
         for column in right.columns
         if column not in equal
     ]
-    return expr.projection(left_projections + right_projections)
+    return expr.select(left_projections + right_projections)
 
 
 public(ExistsSubquery=ExistsSubquery, NotExistsSubquery=NotExistsSubquery)
diff --git a/ibis/expr/types/groupby.py b/ibis/expr/types/groupby.py
@@ -84,7 +84,7 @@ def __init__(
 
     def __getitem__(self, args):
         # Shortcut for projection with window functions
-        return self.projection(list(args))
+        return self.select(*args)
 
     def __getattr__(self, attr):
         if hasattr(self.table, attr):

diff --git a/ibis/tests/expr/test_analysis.py b/ibis/tests/expr/test_analysis.py
@@ -141,7 +141,7 @@ def test_filter_self_join():
 
     metric = (left.total - right.total).name('diff')
     what = [left.region, metric]
-    projected = joined.projection(what)
+    projected = joined.select(what)
 
     proj_exprs = projected.op().selections