From ea977948d711b0d8446fe4d9ca90531df79123c4 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:57:19 -0400 Subject: [PATCH] fix(bigquery): disallow column names longer than 300 characters (#9916) Closes #8931. --- .../bigquery/tests/unit/test_compiler.py | 9 +++++++++ .../sql/compilers/bigquery/__init__.py | 20 ++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ibis/backends/bigquery/tests/unit/test_compiler.py b/ibis/backends/bigquery/tests/unit/test_compiler.py index eb28db2b840f..e058cd214c86 100644 --- a/ibis/backends/bigquery/tests/unit/test_compiler.py +++ b/ibis/backends/bigquery/tests/unit/test_compiler.py @@ -693,3 +693,12 @@ def test_approx_quantiles(alltypes, quantiles, snapshot): query = alltypes.double_col.approx_quantile(quantiles).name("qs") result = ibis.to_sql(query, dialect="bigquery") snapshot.assert_match(result, "out.sql") + + +def test_unreasonably_long_name(): + expr = ibis.literal("hello, world!").name("a" * 301) + with pytest.raises( + com.IbisError, + match="BigQuery does not allow column names longer than 300 characters", + ): + ibis.to_sql(expr, dialect="bigquery") diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 97ccdc1d6511..e803734d1f59 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -924,7 +924,25 @@ def visit_HashBytes(self, op, *, arg, how): @staticmethod def _gen_valid_name(name: str) -> str: - return "_".join(map(str.strip, _NAME_REGEX.findall(name))) or "tmp" + candidate = "_".join(map(str.strip, _NAME_REGEX.findall(name))) or "tmp" + # column names cannot be longer than 300 characters + # + # https://cloud.google.com/bigquery/docs/schemas#column_names + # + # it's easy to rename columns, so raise an exception telling the user + # to do so + # + # we could potentially relax this and support arbitrary-length columns + # by compressing the information using hashing, but there's no reason + # to solve that problem until someone encounters this error and cannot + # rename their columns + limit = 300 + if len(candidate) > limit: + raise com.IbisError( + f"BigQuery does not allow column names longer than {limit:d} characters. " + "Please rename your columns to have fewer characters." + ) + return candidate def visit_CountStar(self, op, *, arg, where): if where is not None: