Skip to content

Commit

Permalink
refactor(api): remove explicit use of .projection in favor of the s…
Browse files Browse the repository at this point in the history
…horter `.select`
  • Loading branch information
cpcloud authored and kszucs committed Apr 6, 2023
1 parent 41b2949 commit 73df8df
Show file tree
Hide file tree
Showing 25 changed files with 82 additions and 91 deletions.
2 changes: 1 addition & 1 deletion docs/ibis-for-sql-programmers.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _exists_subquery(t, op):
filtered = (
op.foreign_table.to_expr()
.filter([pred.to_expr() for pred in op.predicates])
.projection([ir.literal(1).name("")])
.select(ir.literal(1).name(""))
)

sub_ctx = ctx.subcontext()
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def table_column(translator, op):
# context, we should format as a subquery
if translator.permit_subquery and ctx.is_foreign_expr(op.table):
# TODO(kszucs): avoid the expression roundtrip
proj_expr = op.table.to_expr().projection([op.name]).to_array().op()
proj_expr = op.table.to_expr().select([op.name]).to_array().op()
return table_array_view(translator, proj_expr)

alias = ctx.get_ref(op.table, search_parents=True)
Expand All @@ -168,7 +168,7 @@ def exists_subquery(translator, op):
filtered = op.foreign_table.to_expr().filter(
[pred.to_expr() for pred in op.predicates]
)
node = filtered.projection([dummy]).op()
node = filtered.select(dummy).op()

subquery = ctx.get_compiled_expr(node)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/clickhouse/tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_negate(con, alltypes, translate, column, operator):
)
def test_negate_non_boolean(alltypes, field, df):
t = alltypes.limit(10)
expr = t.projection([(-t[field]).name(field)])
expr = t.select((-t[field]).name(field))
result = expr.execute()[field]
expected = -df.head(10)[field]
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/clickhouse/tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def test_filter_predicates(diamonds):

expr = diamonds
for pred in predicates:
expr = expr[pred(expr)].projection([expr])
expr = expr[pred(expr)].select(expr)

expr.execute()

Expand Down
12 changes: 5 additions & 7 deletions ibis/backends/dask/tests/execution/test_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@


def test_array_length(t):
expr = t.projection(
[
t.array_of_float64.length().name('array_of_float64_length'),
t.array_of_int64.length().name('array_of_int64_length'),
t.array_of_strings.length().name('array_of_strings_length'),
]
expr = t.select(
t.array_of_float64.length().name('array_of_float64_length'),
t.array_of_int64.length().name('array_of_int64_length'),
t.array_of_strings.length().name('array_of_strings_length'),
)
result = expr.compile()
expected = dd.from_pandas(
Expand Down Expand Up @@ -173,7 +171,7 @@ def test_array_index_scalar(client, index):
@pytest.mark.parametrize('n', [1, 3, 4, 7, -2]) # negative returns empty list
@pytest.mark.parametrize('mul', [lambda x, n: x * n, lambda x, n: n * x])
def test_array_repeat(t, df, n, mul):
expr = t.projection([mul(t.array_of_strings, n).name('repeated')])
expr = t.select(repeated=mul(t.array_of_strings, n))
result = expr.execute()
expected = pd.DataFrame({'repeated': df.array_of_strings * n})
tm.assert_frame_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def insert(
if partition is not None:
partition_schema = self.partition_schema()
partition_schema_names = frozenset(partition_schema.names)
expr = expr.projection(
expr = expr.select(
[
column
for column in expr.columns
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def test_filter_predicates(con):

expr = t
for pred in predicates:
expr = expr[pred(expr)].projection([expr])
expr = expr[pred(expr)].select(expr)

expr.execute()

Expand Down Expand Up @@ -672,7 +672,7 @@ def test_identical_to(con, left, right, expected):

def test_not(alltypes):
t = alltypes.limit(10)
expr = t.projection([(~t.double_col.isnull()).name('double_col')])
expr = t.select(double_col=~t.double_col.isnull())
result = expr.execute().double_col
expected = ~t.execute().double_col.isnull()
tm.assert_series_equal(result, expected)
Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/impala/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def limit_cte_extract(con):
alltypes = con.table('functional_alltypes')
t = alltypes.limit(100)
t2 = t.view()
return t.join(t2).projection(t)
return t.join(t2).select(t)


@pytest.mark.parametrize(
Expand All @@ -52,7 +52,7 @@ def test_nested_join_base(snapshot):
t = ibis.table(dict(uuid='string', ts='timestamp'), name='t')
counts = t.group_by('uuid').size()
max_counts = counts.group_by('uuid').aggregate(max_count=lambda x: x['count'].max())
result = max_counts.left_join(counts, 'uuid').projection([counts])
result = max_counts.left_join(counts, 'uuid').select(counts)
compiled_result = ImpalaCompiler.to_sql(result)
snapshot.assert_match(compiled_result, "out.sql")

Expand All @@ -68,10 +68,10 @@ def test_nested_joins_single_cte(snapshot):

main_kw = max_counts.left_join(
counts, ['uuid', max_counts.max_count == counts['count']]
).projection([counts])
).select(counts)

result = main_kw.left_join(last_visit, 'uuid').projection(
[main_kw, last_visit.last_visit]
result = main_kw.left_join(last_visit, 'uuid').select(
main_kw, last_visit.last_visit
)
compiled_result = ImpalaCompiler.to_sql(result)
snapshot.assert_match(compiled_result, "out.sql")
Expand Down
14 changes: 7 additions & 7 deletions ibis/backends/impala/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_window_frame_specs(alltypes, window, snapshot):
t = alltypes

w2 = window.order_by(t.f)
expr = t.projection([t.d.sum().over(w2).name('foo')])
expr = t.select(foo=t.d.sum().over(w2))
assert_sql_equal(expr, snapshot)


Expand All @@ -83,8 +83,8 @@ def test_cumulative_functions(alltypes, name, snapshot):
expr = cumfunc().over(w).name("foo")
expected = func().over(ibis.cumulative_window(order_by=t.d)).name("foo")

expr1 = t.projection(expr)
expr2 = t.projection(expected)
expr1 = t.select(expr)
expr2 = t.select(expected)

assert_sql_equal(expr1, snapshot, "out1.sql")
assert_sql_equal(expr2, snapshot, "out2.sql")
Expand All @@ -95,7 +95,7 @@ def test_nested_analytic_function(alltypes, snapshot):

w = window(order_by=t.f)
expr = (t.f - t.f.lag()).lag().over(w).name('foo')
result = t.projection([expr])
result = t.select(expr)
assert_sql_equal(result, snapshot)


Expand All @@ -112,7 +112,7 @@ def test_multiple_windows(alltypes, snapshot):
w = window(group_by=t.g)

expr = t.f.sum().over(w) - t.f.sum()
proj = t.projection([t.g, expr.name('result')])
proj = t.select(t.g, result=expr)

assert_sql_equal(proj, snapshot)

Expand Down Expand Up @@ -154,7 +154,7 @@ def test_unsupported_aggregate_functions(alltypes, column, op):
t = alltypes
w = ibis.window(order_by=t.d)
expr = getattr(t[column], op)()
proj = t.projection([expr.over(w).name('foo')])
proj = t.select(foo=expr.over(w))
with pytest.raises(com.TranslationError):
ImpalaCompiler.to_sql(proj)

Expand All @@ -172,5 +172,5 @@ def test_propagate_nested_windows(alltypes, snapshot):
ex_expr = (t.f - t.f.lag().over(w)).lag().over(w)
assert_equal(result, ex_expr)

expr = t.projection(col.over(w).name('foo'))
expr = t.select(col.over(w).name('foo'))
assert_sql_equal(expr, snapshot)
10 changes: 4 additions & 6 deletions ibis/backends/pandas/tests/execution/test_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ def test_array_literal(client, arr, create_arr_expr):


def test_array_length(t):
expr = t.projection(
[
t.array_of_float64.length().name('array_of_float64_length'),
t.array_of_int64.length().name('array_of_int64_length'),
t.array_of_strings.length().name('array_of_strings_length'),
]
expr = t.select(
t.array_of_float64.length().name('array_of_float64_length'),
t.array_of_int64.length().name('array_of_int64_length'),
t.array_of_strings.length().name('array_of_strings_length'),
)
result = expr.execute()
expected = pd.DataFrame(
Expand Down
38 changes: 17 additions & 21 deletions ibis/backends/postgres/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,9 +743,7 @@ def test_simple_window(alltypes, func, df):
t = alltypes
f = getattr(t.double_col, func)
df_f = getattr(df.double_col, func)
result = (
t.projection([(t.double_col - f()).name('double_col')]).execute().double_col
)
result = t.select((t.double_col - f()).name('double_col')).execute().double_col
expected = df.double_col - df_f()
tm.assert_series_equal(result, expected)

Expand All @@ -761,7 +759,7 @@ def test_rolling_window(alltypes, func, df):
window = ibis.window(order_by=t.timestamp_col, preceding=6, following=0)
f = getattr(t.double_col, func)
df_f = getattr(df.double_col.rolling(7, min_periods=0), func)
result = t.projection([f().over(window).name('double_col')]).execute().double_col
result = t.select(f().over(window).name('double_col')).execute().double_col
expected = df_f()
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -797,7 +795,7 @@ def rolled(df):

f = getattr(t.double_col, func)
expr = f().over(window).name('double_col')
result = t.projection([expr]).execute().double_col
result = t.select(expr).execute().double_col
expected = df.groupby('string_col').apply(roller(func)).reset_index(drop=True)
tm.assert_series_equal(result, expected)

Expand All @@ -807,7 +805,7 @@ def test_cumulative_simple_window(alltypes, func, df):
t = alltypes
f = getattr(t.double_col, func)
col = t.double_col - f().over(ibis.cumulative_window())
expr = t.projection([col.name('double_col')])
expr = t.select(col.name('double_col'))
result = expr.execute().double_col
expected = df.double_col - getattr(df.double_col, 'cum%s' % func)()
tm.assert_series_equal(result, expected)
Expand All @@ -819,7 +817,7 @@ def test_cumulative_partitioned_window(alltypes, func, df):
df = df.sort_values('string_col').reset_index(drop=True)
window = ibis.cumulative_window(group_by=t.string_col)
f = getattr(t.double_col, func)
expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
expr = t.select((t.double_col - f().over(window)).name('double_col'))
result = expr.execute().double_col
expected = df.groupby(df.string_col).double_col.transform(
lambda c: c - getattr(c, 'cum%s' % func)()
Expand All @@ -833,7 +831,7 @@ def test_cumulative_ordered_window(alltypes, func, df):
df = df.sort_values('timestamp_col').reset_index(drop=True)
window = ibis.cumulative_window(order_by=t.timestamp_col)
f = getattr(t.double_col, func)
expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
expr = t.select((t.double_col - f().over(window)).name('double_col'))
result = expr.execute().double_col
expected = df.double_col - getattr(df.double_col, 'cum%s' % func)()
tm.assert_series_equal(result, expected)
Expand All @@ -845,7 +843,7 @@ def test_cumulative_partitioned_ordered_window(alltypes, func, df):
df = df.sort_values(['string_col', 'timestamp_col']).reset_index(drop=True)
window = ibis.cumulative_window(order_by=t.timestamp_col, group_by=t.string_col)
f = getattr(t.double_col, func)
expr = t.projection([(t.double_col - f().over(window)).name('double_col')])
expr = t.select((t.double_col - f().over(window)).name('double_col'))
result = expr.execute().double_col
method = operator.methodcaller(f'cum{func}')
expected = df.groupby(df.string_col).double_col.transform(lambda c: c - method(c))
Expand Down Expand Up @@ -931,12 +929,10 @@ def array_types(con):


def test_array_length(array_types):
expr = array_types.projection(
[
array_types.x.length().name('x_length'),
array_types.y.length().name('y_length'),
array_types.z.length().name('z_length'),
]
expr = array_types.select(
array_types.x.length().name('x_length'),
array_types.y.length().name('y_length'),
array_types.z.length().name('z_length'),
)
result = expr.execute()
expected = pd.DataFrame(
Expand Down Expand Up @@ -995,7 +991,7 @@ def test_array_index(array_types, index):
],
)
def test_array_repeat(array_types, n, mul):
expr = array_types.projection([mul(array_types.x, n).name('repeated')])
expr = array_types.select(mul(array_types.x, n).name('repeated'))
result = expr.execute()
expected = pd.DataFrame(
{'repeated': array_types.x.execute().map(lambda x, n=n: mul(x, n))}
Expand All @@ -1013,9 +1009,9 @@ def test_array_repeat(array_types, n, mul):
def test_array_concat(array_types, catop):
t = array_types
x, y = t.x.cast('array<string>').name('x'), t.y
expr = t.projection([catop(x, y).name('catted')])
expr = t.select(catop(x, y).name('catted'))
result = expr.execute()
tuples = t.projection([x, y]).execute().itertuples(index=False)
tuples = t.select(x, y).execute().itertuples(index=False)
expected = pd.DataFrame({'catted': [catop(i, j) for i, j in tuples]})
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1159,7 +1155,7 @@ def test_ntile(con):
def test_not_and_negate_bool(con, opname, df):
op = getattr(operator, opname)
t = con.table('functional_alltypes').limit(10)
expr = t.projection([op(t.bool_col).name('bool_col')])
expr = t.select(op(t.bool_col).name('bool_col'))
result = expr.execute().bool_col
expected = op(df.head(10).bool_col)
tm.assert_series_equal(result, expected)
Expand All @@ -1180,15 +1176,15 @@ def test_not_and_negate_bool(con, opname, df):
)
def test_negate_non_boolean(con, field, df):
t = con.table('functional_alltypes').limit(10)
expr = t.projection([(-t[field]).name(field)])
expr = t.select((-t[field]).name(field))
result = expr.execute()[field]
expected = -df.head(10)[field]
tm.assert_series_equal(result, expected)


def test_negate_boolean(con, df):
t = con.table('functional_alltypes').limit(10)
expr = t.projection([(-t.bool_col).name('bool_col')])
expr = t.select((-t.bool_col).name('bool_col'))
result = expr.execute().bool_col
expected = -df.head(10).bool_col
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pyspark/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_array_concat_scalar(client, op):
def test_array_repeat(client, n, mul):
table = client.table('array_table')

expr = table.projection([mul(table.array_int, n).name('repeated')])
expr = table.select(mul(table.array_int, n).name('repeated'))
result = expr.execute()

df = table.compile().toPandas()
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sqlite/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def test_truncate_from_connection(con, alltypes):

def test_not(alltypes):
t = alltypes.limit(10)
expr = t.projection([(~t.double_col.isnull()).name('double_col')])
expr = t.select([(~t.double_col.isnull()).name('double_col')])
result = expr.execute().double_col
expected = ~t.execute().double_col.isnull()
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_result(self):
for other in self.tables[1:]:
table = table.cross_join(other)

return table.projection([subbed_expr])
return table.select(subbed_expr)

def _visit(self, expr):
assert isinstance(expr, ir.Expr), type(expr)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def _dedup_join_columns(expr, suffixes: tuple[str, str]):
for column in right.columns
if column not in equal
]
return expr.projection(left_projections + right_projections)
return expr.select(left_projections + right_projections)


public(ExistsSubquery=ExistsSubquery, NotExistsSubquery=NotExistsSubquery)
2 changes: 1 addition & 1 deletion ibis/expr/types/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(

def __getitem__(self, args):
# Shortcut for projection with window functions
return self.projection(list(args))
return self.select(*args)

def __getattr__(self, attr):
if hasattr(self.table, attr):
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def test_filter_self_join():

metric = (left.total - right.total).name('diff')
what = [left.region, metric]
projected = joined.projection(what)
projected = joined.select(what)

proj_exprs = projected.op().selections

Expand Down
Loading

0 comments on commit 73df8df

Please sign in to comment.