Skip to content

Commit

Permalink
fix(bigquery): quote all parts of table names
Browse files Browse the repository at this point in the history
The BigQuery identifier quoting semantics are bonkers
https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers

my-table is OK, but not mydataset.my-table

mytable-287 is OK, but not mytable-287a

Just quote everything.
  • Loading branch information
gforsyth committed May 7, 2024
1 parent 4f93a91 commit 8e1902f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 2 deletions.
30 changes: 29 additions & 1 deletion ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,27 @@ def _remove_null_ordering_from_unsupported_window(
return node


def _force_quote_table(table: sge.Table) -> sge.Table:
"""Force quote all the parts of a bigquery path.
The BigQuery identifier quoting semantics are bonkers
https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
my-table is OK, but not mydataset.my-table
mytable-287 is OK, but not mytable-287a
Just quote everything.
"""
for key in ("this", "db", "catalog"):
if (val := table.args[key]) is not None:
if isinstance(val, sg.exp.Identifier) and not val.quoted:
val.args["quoted"] = True
else:
table.args[key] = sg.to_identifier(val, quoted=True)
return table


class Backend(SQLBackend, CanCreateDatabase, CanCreateSchema):
name = "bigquery"
compiler = BigQueryCompiler()
Expand Down Expand Up @@ -1025,14 +1046,21 @@ def create_table(
try:
table = sg.parse_one(name, into=sge.Table, read="bigquery")
except sg.ParseError:
table = sg.table(name, db=dataset, catalog=project_id)
table = sg.table(
name,
db=dataset,
catalog=project_id,
quoted=self.compiler.quoted,
)
else:
if table.args["db"] is None:
table.args["db"] = dataset

if table.args["catalog"] is None:
table.args["catalog"] = project_id

table = _force_quote_table(table)

column_defs = [
sge.ColumnDef(
this=sg.to_identifier(name, quoted=self.compiler.quoted),
Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/bigquery/tests/system/test_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,17 @@ def test_client_with_regional_endpoints(project_id, credentials, dataset_id):
df = alltypes.execute()
assert df.empty
assert not len(alltypes.to_pyarrow())


def test_create_table_from_memtable_needs_quotes(project_id, credentials):
con = ibis.bigquery.connect(
project_id=project_id,
dataset_id=f"{project_id}.testing",
credentials=credentials,
)

con.create_table(
"region-table",
schema=ibis.schema(dict(its_always="str", quoting="int")),
)
con.drop_table("region-table")
23 changes: 22 additions & 1 deletion ibis/backends/bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import pytest
import sqlglot as sg

from ibis.backends.bigquery import client
from ibis.backends.bigquery import _force_quote_table, client


@pytest.mark.parametrize(
Expand Down Expand Up @@ -30,3 +31,23 @@ def test_parse_project_and_dataset_raises_error():
expected_message = "data-project.my_dataset.table is not a BigQuery dataset"
with pytest.raises(ValueError, match=expected_message):
client.parse_project_and_dataset("my-project", "data-project.my_dataset.table")


@pytest.mark.parametrize(
"bq_path_str",
[
"ibis-gbq.ibis_gbq_testing.argle",
"ibis-gbq.ibis_gbq_testing.28argle",
"mytable-287a",
"myproject.mydataset.my-table",
"my-dataset.mytable",
"a-7b0a.dev_test_dataset.test_ibis5",
],
)
def test_force_quoting(bq_path_str):
table = sg.parse_one(bq_path_str, into=sg.exp.Table, read="bigquery")
table = _force_quote_table(table)

for key in ("this", "db", "catalog"):
if (val := table.args[key]) is not None:
assert val.quoted

0 comments on commit 8e1902f

Please sign in to comment.