Skip to content

Commit

Permalink
fix(impala): remove broken alias elision
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jan 13, 2023
1 parent 6cd80a6 commit 32b120f
Show file tree
Hide file tree
Showing 264 changed files with 874 additions and 922 deletions.
33 changes: 18 additions & 15 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,24 +81,27 @@ def get_sqla_table(ctx, table):
return sa_table


def get_col(sa_table, op: ops.TableColumn):
"""Get a `Column`."""
def get_col(sa_table, op: ops.TableColumn) -> sa.sql.ColumnClause:
"""Extract a column from a table."""
cols = sa_table.exported_columns
colname = op.name

try:
return cols[colname]
except KeyError:
# cols is a sqlalchemy column collection which contains column
# names that are secretly prefixed by their containing table
#
# sqlalchemy doesn't let you select by the *un*prefixed column name
# despite the uniqueness of `colname`
#
# however, in ibis we have already deduplicated column names so we can
# refer to the name by position
colindex = op.table.schema._name_locs[colname]
return cols[colindex]
if (col := cols.get(colname)) is not None:
return col

# `cols` is a SQLAlchemy column collection that contains columns
# with names that are secretly prefixed by table that contains them
#
# for example, in `t0.join(t1).select(t0.a, t1.b)` t0.a will be named `t0_a`
# and t1.b will be named `t1_b`
#
# unfortunately SQLAlchemy doesn't let you select by the *un*prefixed
# column name despite the uniqueness of `colname`
#
# however, in ibis we have already deduplicated column names so we can
# refer to the name by position
colindex = op.table.schema._name_locs[colname]
return cols[colindex]


def _table_column(t, op):
Expand Down
14 changes: 3 additions & 11 deletions ibis/backends/base/sql/compiler/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def _format_table(self, op):

if isinstance(ref_op, ops.InMemoryTable):
result = self._format_in_memory_table(ref_op)
is_subquery = True
elif isinstance(ref_op, ops.PhysicalTable):
name = ref_op.name
# TODO(kszucs): add a mandatory `name` field to the base
Expand All @@ -102,7 +101,6 @@ def _format_table(self, op):
if name is None:
raise com.RelationError(f'Table did not have a name: {op!r}')
result = self._quote_identifier(name)
is_subquery = False
else:
# A subquery
if ctx.is_extracted(ref_op):
Expand All @@ -118,10 +116,8 @@ def _format_table(self, op):

subquery = ctx.get_compiled_expr(op)
result = f'(\n{util.indent(subquery, self.indent)}\n)'
is_subquery = True

if is_subquery or ctx.need_aliases(op):
result += f' {ctx.get_ref(op)}'
result += f' {ctx.get_ref(op)}'

return result

Expand Down Expand Up @@ -302,12 +298,8 @@ def format_select_set(self):
if isinstance(node, ops.Value):
expr_str = self._translate(node, named=True)
elif isinstance(node, ops.TableNode):
# A * selection, possibly prefixed
if context.need_aliases(node):
alias = context.get_ref(node)
expr_str = f'{alias}.*' if alias else '*'
else:
expr_str = '*'
alias = context.get_ref(node)
expr_str = f'{alias}.*' if alias else '*'
else:
raise TypeError(node)
formatted.append(expr_str)
Expand Down
16 changes: 0 additions & 16 deletions ibis/backends/base/sql/compiler/select_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,22 +215,6 @@ def _populate_context(self):
if self.table_set is not None:
self._make_table_aliases(self.table_set)

# XXX: This is a temporary solution to the table-aliasing / correlated
# subquery problem. Will need to revisit and come up with a cleaner
# design (also as one way to avoid pathological naming conflicts; for
# example, we could define a table alias before we know that it
# conflicts with the name of a table used in a subquery, join, or
# another part of the query structure)

# There may be correlated subqueries inside the filters, requiring that
# we use an explicit alias when outputting as SQL. For now, we're just
# going to see if any table nodes appearing in the where stack have
# been marked previously by the above code.
for expr in self.filters:
needs_alias = self._foreign_ref_check(self, expr)
if needs_alias:
self.context.set_always_alias()

# TODO(kszucs): should be rewritten using lin.traverse()
def _make_table_aliases(self, node):
ctx = self.context
Expand Down
17 changes: 10 additions & 7 deletions ibis/backends/base/sql/compiler/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, compiler, indent=2, parent=None, params=None):
self.subquery_memo = {}
self.indent = indent
self.parent = parent
self.always_alias = False
self.always_alias = True
self.query = None
self.params = params if params is not None else {}

Expand Down Expand Up @@ -92,9 +92,6 @@ def make_alias(self, node):
alias = f't{i:d}'
self.set_ref(node, alias)

def need_aliases(self, expr=None):
return self.always_alias or len(self.table_refs) > 1

def _contexts(
self,
*,
Expand All @@ -114,14 +111,20 @@ def has_ref(self, node, parent_contexts=False):
def set_ref(self, node, alias):
self.table_refs[node] = alias

def get_ref(self, node):
def get_ref(self, node, search_parents=False):
"""Return the alias used to refer to an expression."""
assert isinstance(node, ops.Node)
assert isinstance(node, ops.Node), type(node)

if self.is_extracted(node):
return self.top_context.table_refs.get(node)

return self.table_refs.get(node)
if (ref := self.table_refs.get(node)) is not None:
return ref

if search_parents and (parent := self.parent) is not None:
return parent.get_ref(node, search_parents=search_parents)

return None

def is_extracted(self, node):
return node in self.top_context.extracted_subexprs
Expand Down
17 changes: 5 additions & 12 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,9 @@ def table_column(translator, op):
proj_expr = op.table.to_expr().projection([op.name]).to_array().op()
return table_array_view(translator, proj_expr)

if ctx.need_aliases():
alias = ctx.get_ref(op.table)
if alias is not None:
quoted_name = f'{alias}.{quoted_name}'
alias = ctx.get_ref(op.table, search_parents=True)
if alias is not None:
quoted_name = f"{alias}.{quoted_name}"

return quoted_name

Expand All @@ -175,14 +174,8 @@ def exists_subquery(translator, op):

subquery = ctx.get_compiled_expr(node)

if isinstance(op, ops.ExistsSubquery):
key = 'EXISTS'
elif isinstance(op, ops.NotExistsSubquery):
key = 'NOT EXISTS'
else:
raise NotImplementedError

return f'{key} (\n{util.indent(subquery, ctx.indent)}\n)'
prefix = "NOT " * isinstance(op, ops.NotExistsSubquery)
return f'{prefix}EXISTS (\n{util.indent(subquery, ctx.indent)}\n)'


# XXX this is not added to operation_registry, but looks like impala is
Expand Down
18 changes: 9 additions & 9 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@ def test_different_partition_col_name(monkeypatch, client):

def test_subquery_scalar_params(alltypes, project_id, dataset_id):
expected = f"""\
SELECT count\\(`foo`\\) AS `count`
SELECT count\\(t0\\.`foo`\\) AS `count`
FROM \\(
SELECT `string_col`, sum\\(`float_col`\\) AS `foo`
SELECT t1\\.`string_col`, sum\\(t1\\.`float_col`\\) AS `foo`
FROM \\(
SELECT `float_col`, `timestamp_col`, `int_col`, `string_col`
FROM `{project_id}\\.{dataset_id}\\.functional_alltypes`
WHERE `timestamp_col` < @param_\\d+
SELECT t2\\.`float_col`, t2\\.`timestamp_col`, t2\\.`int_col`, t2\\.`string_col`
FROM `{project_id}\\.{dataset_id}\\.functional_alltypes` t2
WHERE t2\\.`timestamp_col` < @param_\\d+
\\) t1
GROUP BY 1
\\) t0"""
Expand Down Expand Up @@ -225,11 +225,11 @@ def test_cross_project_query(public):
expr = table[table.tags.contains("ibis")][["title", "tags"]]
result = expr.compile()
expected = """\
SELECT `title`, `tags`
SELECT t0.`title`, t0.`tags`
FROM (
SELECT *
FROM `bigquery-public-data.stackoverflow.posts_questions`
WHERE STRPOS(`tags`, 'ibis') - 1 >= 0
SELECT t1.*
FROM `bigquery-public-data.stackoverflow.posts_questions` t1
WHERE STRPOS(t1.`tags`, 'ibis') - 1 >= 0
) t0"""
assert result == expected
n = 5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT APPROX_QUANTILES(if(`month` > 0, `double_col`, NULL), 2)[OFFSET(1)] AS `tmp`
FROM functional_alltypes
SELECT APPROX_QUANTILES(if(t0.`month` > 0, t0.`double_col`, NULL), 2)[OFFSET(1)] AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT APPROX_COUNT_DISTINCT(if(`month` > 0, `double_col`, NULL)) AS `tmp`
FROM functional_alltypes
SELECT APPROX_COUNT_DISTINCT(if(t0.`month` > 0, t0.`double_col`, NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT APPROX_QUANTILES(`double_col`, 2)[OFFSET(1)] AS `tmp`
FROM functional_alltypes
SELECT APPROX_QUANTILES(t0.`double_col`, 2)[OFFSET(1)] AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT APPROX_COUNT_DISTINCT(`double_col`) AS `tmp`
FROM functional_alltypes
SELECT APPROX_COUNT_DISTINCT(t0.`double_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT CAST(`value` AS BYTES) AS `tmp`
FROM t
SELECT CAST(t0.`value` AS BYTES) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_AND(if(`bigint_col` > 0, `int_col`, NULL)) AS `tmp`
FROM functional_alltypes
SELECT BIT_AND(if(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_OR(if(`bigint_col` > 0, `int_col`, NULL)) AS `tmp`
FROM functional_alltypes
SELECT BIT_OR(if(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_XOR(if(`bigint_col` > 0, `int_col`, NULL)) AS `tmp`
FROM functional_alltypes
SELECT BIT_XOR(if(t0.`bigint_col` > 0, t0.`int_col`, NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_AND(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_AND(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_OR(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_OR(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_XOR(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_XOR(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(CAST(`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes
SELECT avg(CAST(t0.`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(CAST(`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes
SELECT sum(CAST(t0.`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(if((`month` > 6) AND (`month` < 10), CAST(`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes
SELECT sum(if((t0.`month` > 6) AND (t0.`month` < 10), CAST(t0.`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(if(`month` > 6, CAST(`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes
SELECT avg(if(t0.`month` > 6, CAST(t0.`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
CASE
WHEN (0 <= `value`) AND (`value` < 1) THEN 0
WHEN (1 <= `value`) AND (`value` <= 3) THEN 1
WHEN (0 <= t0.`value`) AND (t0.`value` < 1) THEN 0
WHEN (1 <= t0.`value`) AND (t0.`value` <= 3) THEN 1
ELSE CAST(NULL AS INT64)
END AS `tmp`
FROM t
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`foo`) AS `tmp`
FROM t0
SELECT sum(t0.`foo`) AS `tmp`
FROM t0 t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT COVAR_POP(`double_col`, `double_col`) AS `tmp`
FROM functional_alltypes
SELECT COVAR_POP(t0.`double_col`, t0.`double_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT COVAR_SAMP(`double_col`, `double_col`) AS `tmp`
FROM functional_alltypes
SELECT COVAR_SAMP(t0.`double_col`, t0.`double_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT CAST(FLOOR(IEEE_DIVIDE(`double_col`, 0)) AS INT64) AS `tmp`
FROM functional_alltypes
SELECT CAST(FLOOR(IEEE_DIVIDE(t0.`double_col`, 0)) AS INT64) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT IEEE_DIVIDE(`double_col`, 0) AS `tmp`
FROM functional_alltypes
SELECT IEEE_DIVIDE(t0.`double_col`, 0) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE(`ts`) AS `tmp`
FROM t
SELECT DATE(t0.`ts`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME(`ts`) AS `tmp`
FROM t
SELECT TIME(t0.`ts`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_AZIMUTH(`p0`, `p1`) AS `tmp`
FROM t
SELECT ST_AZIMUTH(t0.`p0`, t0.`p1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_CONTAINS(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_CONTAINS(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Loading

0 comments on commit 32b120f

Please sign in to comment.