From b319c674317196f599524ddf374fb1596526e7e2 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Thu, 5 Aug 2021 14:29:26 -0500 Subject: [PATCH 01/13] add blueprints to resolve issue --- .../bigquery/dbt/adapters/bigquery/connections.py | 5 +++++ test/unit/test_bigquery_adapter.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 137b02d4f48..2dcecb37f75 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -595,9 +595,14 @@ def _is_retryable(error): _SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]") +_SANITIZE_LABEL_LENGTH_LIMIT = 63 +# TODO: extend this function to check length is <= 63 characters def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) + # TODO: add assert for length + value_length = len(value) + assert value_length <= _SANITIZE_LABEL_LENGTH_LIMIT return value diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 5f7d79054e6..45a2fc8086e 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -1,6 +1,8 @@ import agate import decimal import json +import string +import random import re import pytest import unittest @@ -972,3 +974,14 @@ def test_convert_time_type(self): ) def test_sanitize_label(input, output): assert _sanitize_label(input) == output + + +# TODO: test strings that have verified lengths of 63 and greater and +# TODO assert that I get the proper error message that the comment string is too long +# TODO: think of using a random(63) function +@pytest.mark.parametrize(N=63) +def test_sanitize_label_length(N): + random_string = "".join( + random.choice(string.ascii_uppercase + string.digits) for i in range(N) + ) + assert _sanitize_label(random_string) == AssertionError From f88abfdcc7ac77b4290a1a502a34fa07d27cda60 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Thu, 5 Aug 2021 16:05:34 -0500 Subject: [PATCH 02/13] revert to previous version --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 2dcecb37f75..34d7f0e14ee 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -597,12 +597,8 @@ def _is_retryable(error): _SANITIZE_LABEL_LENGTH_LIMIT = 63 -# TODO: extend this function to check length is <= 63 characters def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) - # TODO: add assert for length - value_length = len(value) - assert value_length <= _SANITIZE_LABEL_LENGTH_LIMIT return value From a73a881494a60aa7bdcd18361cb374740b293477 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Thu, 5 Aug 2021 16:32:01 -0500 Subject: [PATCH 03/13] intentionally failing test --- .../bigquery/dbt/adapters/bigquery/connections.py | 12 ++++++++++++ test/unit/test_bigquery_adapter.py | 9 +++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 34d7f0e14ee..e42575b771c 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -602,3 +602,15 @@ def _sanitize_label(value: str) -> str: value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) return value + + +def _validate_label_length(value: str) -> None: + try: + value_length = len(value) + assert value_length <= _VALIDATE_LABEL_LENGTH_LIMIT + except AssertionError as e: + error_msg = ( + f"Label is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" + ) + e.args += (error_msg, "") + raise e diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 45a2fc8086e..87a695a53e7 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -979,9 +979,10 @@ def test_sanitize_label(input, output): # TODO: test strings that have verified lengths of 63 and greater and # TODO assert that I get the proper error message that the comment string is too long # TODO: think of using a random(63) function -@pytest.mark.parametrize(N=63) -def test_sanitize_label_length(N): +def test_validate_label_length(): random_string = "".join( - random.choice(string.ascii_uppercase + string.digits) for i in range(N) + random.choice(string.ascii_uppercase + string.digits) for i in range(64) ) - assert _sanitize_label(random_string) == AssertionError + with pytest.raises(AssertionError) as error_info: + _validate_label_length(random_string) + assert error_info.value.args[0] == f"Label is greater than length limit: 64" From 6d4f6d61a7ecafc8527ce5f290862abc818c57eb Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Thu, 5 Aug 2021 16:32:29 -0500 Subject: [PATCH 04/13] add imports --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 3 ++- test/unit/test_bigquery_adapter.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index e42575b771c..104d91ade4e 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -595,7 +595,8 @@ def _is_retryable(error): _SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]") -_SANITIZE_LABEL_LENGTH_LIMIT = 63 +_VALIDATE_LABEL_LENGTH_LIMIT = 63 + def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 87a695a53e7..8dfc99249c5 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -19,7 +19,7 @@ from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery import Plugin as BigQueryPlugin from dbt.adapters.bigquery.connections import BigQueryConnectionManager -from dbt.adapters.bigquery.connections import _sanitize_label +from dbt.adapters.bigquery.connections import _sanitize_label, _validate_label_length from dbt.adapters.base.query_headers import MacroQueryStringSetter from dbt.clients import agate_helper import dbt.exceptions From cf9b78bbc7dd195d14f49e84c68a1e5e63ea5cfa Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Fri, 6 Aug 2021 09:19:49 -0500 Subject: [PATCH 05/13] add validation in existing function --- .../dbt/adapters/bigquery/connections.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 104d91ade4e..a9452886f14 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -602,16 +602,11 @@ def _sanitize_label(value: str) -> str: """Return a legal value for a BigQuery label.""" value = value.strip().lower() value = _SANITIZE_LABEL_PATTERN.sub("_", value) - return value - - -def _validate_label_length(value: str) -> None: - try: - value_length = len(value) - assert value_length <= _VALIDATE_LABEL_LENGTH_LIMIT - except AssertionError as e: + value_length = len(value) + if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: error_msg = ( - f"Label is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" + f"Current label length {value_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" ) - e.args += (error_msg, "") - raise e + raise Exception(error_msg) + else: + return value From 6dad2424807780f2deb770bc1459a24f26b656df Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Fri, 6 Aug 2021 09:20:21 -0500 Subject: [PATCH 06/13] add passing test for length validation --- test/unit/test_bigquery_adapter.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 8dfc99249c5..1cda09aee21 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -19,7 +19,7 @@ from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery import Plugin as BigQueryPlugin from dbt.adapters.bigquery.connections import BigQueryConnectionManager -from dbt.adapters.bigquery.connections import _sanitize_label, _validate_label_length +from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT from dbt.adapters.base.query_headers import MacroQueryStringSetter from dbt.clients import agate_helper import dbt.exceptions @@ -976,13 +976,17 @@ def test_sanitize_label(input, output): assert _sanitize_label(input) == output -# TODO: test strings that have verified lengths of 63 and greater and -# TODO assert that I get the proper error message that the comment string is too long -# TODO: think of using a random(63) function -def test_validate_label_length(): +@pytest.mark.parametrize( + "label_length", + [64, 65, 100], +) +def test_sanitize_label_length(label_length): + test_error_msg = ( + f"Current label length {label_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" + ) random_string = "".join( - random.choice(string.ascii_uppercase + string.digits) for i in range(64) + random.choice(string.ascii_uppercase + string.digits) for i in range(label_length) ) - with pytest.raises(AssertionError) as error_info: - _validate_label_length(random_string) - assert error_info.value.args[0] == f"Label is greater than length limit: 64" + with pytest.raises(Exception) as error_info: + _sanitize_label(random_string) + assert error_info.value.args[0] == test_error_msg From dde0c6eb2f325c3267a45484f8874306252f9710 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Fri, 6 Aug 2021 09:50:36 -0500 Subject: [PATCH 07/13] add current sanitized label --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 2 +- test/unit/test_bigquery_adapter.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index a9452886f14..bf21b1bafd0 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -605,7 +605,7 @@ def _sanitize_label(value: str) -> str: value_length = len(value) if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: error_msg = ( - f"Current label length {value_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" + f"Current label length {value_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT} | Current sanitized label: {value}" ) raise Exception(error_msg) else: diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index 1cda09aee21..dc3290a3035 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -985,8 +985,10 @@ def test_sanitize_label_length(label_length): f"Current label length {label_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" ) random_string = "".join( - random.choice(string.ascii_uppercase + string.digits) for i in range(label_length) + random.choice(string.ascii_uppercase + string.digits) + for i in range(label_length) ) + test_error_msg = f"Current label length {label_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT} | Current sanitized label: {random_string.lower()}" with pytest.raises(Exception) as error_info: - _sanitize_label(random_string) + _sanitize_label(random_string) assert error_info.value.args[0] == test_error_msg From 0bbb8b1c2fcc0f270815c248e1452368db18e164 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Fri, 6 Aug 2021 09:52:37 -0500 Subject: [PATCH 08/13] remove duplicate var --- test/unit/test_bigquery_adapter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index dc3290a3035..cc9de006199 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -981,9 +981,6 @@ def test_sanitize_label(input, output): [64, 65, 100], ) def test_sanitize_label_length(label_length): - test_error_msg = ( - f"Current label length {label_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT}" - ) random_string = "".join( random.choice(string.ascii_uppercase + string.digits) for i in range(label_length) From 4dffc0075c3d7b492626a49d20f4cc6c7da35bb0 Mon Sep 17 00:00:00 2001 From: sungchun12 Date: Mon, 16 Aug 2021 08:54:19 -0500 Subject: [PATCH 09/13] Make logging output 2 lines Co-authored-by: Jeremy Cohen --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index bf21b1bafd0..1f91c93c91d 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -605,7 +605,9 @@ def _sanitize_label(value: str) -> str: value_length = len(value) if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: error_msg = ( - f"Current label length {value_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT} | Current sanitized label: {value}" + f"Job label length {value_length} is greater than length limit: " \ + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" \ + f"Current sanitized label: {value}" ) raise Exception(error_msg) else: From 88a3fb3a8d1432bb7e544b7841b047a8de090f78 Mon Sep 17 00:00:00 2001 From: sungchun12 Date: Mon, 16 Aug 2021 08:55:53 -0500 Subject: [PATCH 10/13] Raise RuntimeException to better handle error Co-authored-by: Jeremy Cohen --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 1f91c93c91d..7042346cd79 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -609,6 +609,6 @@ def _sanitize_label(value: str) -> str: f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" \ f"Current sanitized label: {value}" ) - raise Exception(error_msg) + raise RuntimeException(error_msg) else: return value From b77cf06ab6eff41eab5d933344e154ae7ba6dc67 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Mon, 16 Aug 2021 09:04:15 -0500 Subject: [PATCH 11/13] update test --- test/unit/test_bigquery_adapter.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index cc9de006199..ecec53a61de 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -985,7 +985,11 @@ def test_sanitize_label_length(label_length): random.choice(string.ascii_uppercase + string.digits) for i in range(label_length) ) - test_error_msg = f"Current label length {label_length} is greater than length limit: {_VALIDATE_LABEL_LENGTH_LIMIT} | Current sanitized label: {random_string.lower()}" - with pytest.raises(Exception) as error_info: + test_error_msg = ( + f"Job label length {label_length} is greater than length limit: " \ + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" \ + f"Current sanitized label: {random_string.lower()}" + ) + with pytest.raises(dbt.exceptions.RuntimeException) as error_info: _sanitize_label(random_string) assert error_info.value.args[0] == test_error_msg From 846703e4746cf0857b90b5ac4f56a20420430be6 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Mon, 16 Aug 2021 09:16:32 -0500 Subject: [PATCH 12/13] fix flake8 errors --- plugins/bigquery/dbt/adapters/bigquery/connections.py | 4 ++-- test/unit/test_bigquery_adapter.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 7042346cd79..62de829b23b 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -605,8 +605,8 @@ def _sanitize_label(value: str) -> str: value_length = len(value) if value_length > _VALIDATE_LABEL_LENGTH_LIMIT: error_msg = ( - f"Job label length {value_length} is greater than length limit: " \ - f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" \ + f"Job label length {value_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" f"Current sanitized label: {value}" ) raise RuntimeException(error_msg) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index ecec53a61de..147b8cba0df 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -986,8 +986,8 @@ def test_sanitize_label_length(label_length): for i in range(label_length) ) test_error_msg = ( - f"Job label length {label_length} is greater than length limit: " \ - f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" \ + f"Job label length {label_length} is greater than length limit: " + f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n" f"Current sanitized label: {random_string.lower()}" ) with pytest.raises(dbt.exceptions.RuntimeException) as error_info: From 79179363c9f3843f830ccdc1e5588b5217fa7321 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Mon, 16 Aug 2021 09:17:39 -0500 Subject: [PATCH 13/13] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16a21a1696c..c1c07e82ada 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ Contributors: ### Under the hood - Switch to full reparse on partial parsing exceptions. Log and report exception information. ([#3725](https://github.com/dbt-labs/dbt/issues/3725), [#3733](https://github.com/dbt-labs/dbt/pull/3733)) - Check for existence of test node when removing. ([#3711](https://github.com/dbt-labs/dbt/issues/3711), [#3750](https://github.com/dbt-labs/dbt/pull/3750)) +- Better error handling for BigQuery job labels that are too long. [#3703](https://github.com/dbt-labs/dbt/pull/3703) ## dbt 0.20.1 (August 11, 2021)