From cf64140fb84f98b926369d74990dbb10cc11d29a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 25 Jul 2019 16:43:22 +0200 Subject: [PATCH 1/6] Add StandardSqlDataTypes enum to BigQuery This is a convenience enum that contains scalar SQL data types constants (a subsset of types defined in the gapic enum generated from the .proto definitions). --- bigquery/google/cloud/bigquery/__init__.py | 2 + bigquery/google/cloud/bigquery/enums.py | 84 +++++++++++++++++++++ bigquery/tests/unit/test_enums.py | 85 ++++++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 bigquery/google/cloud/bigquery/enums.py create mode 100644 bigquery/tests/unit/test_enums.py diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py index b84051fc6be1..c41ceb6b0306 100644 --- a/bigquery/google/cloud/bigquery/__init__.py +++ b/bigquery/google/cloud/bigquery/__init__.py @@ -36,6 +36,7 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -130,6 +131,7 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", + "StandardSqlDataTypes", "SourceFormat", "WriteDisposition", ] diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py new file mode 100644 index 000000000000..f35e3fda3340 --- /dev/null +++ b/bigquery/google/cloud/bigquery/enums.py @@ -0,0 +1,84 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import warnings + +import enum +import six + +from google.cloud.bigquery_v2.gapic import enums as gapic_enums + + +def _make_sql_scalars_enum(): + """Create an enum based on a gapic enum containing only SQL scalar types.""" + sql_scalar_types = frozenset( + ( + "INT64", + "BOOL", + "FLOAT64", + "STRING", + "BYTES", + "TIMESTAMP", + "DATE", + "TIME", + "DATETIME", + "NUMERIC", + ) + ) + excluded_members = frozenset( + ("TYPE_KIND_UNSPECIFIED", "GEOGRAPHY", "ARRAY", "STRUCT") + ) + + # Sanity check - we do not want the new enum to go out of sync with the original + # enum from gapic. + # ASSUMPTION: No existing types are ever renamed or deleted, we only try to + # detect cases when new types are introduced. + gapic_names = set(m.name for m in gapic_enums.StandardSqlDataType.TypeKind) + anticipated_names = sql_scalar_types | excluded_members + unhandled_names = gapic_names - anticipated_names + + if unhandled_names: + msg = ( + "The StandardSqlDataTypes enum migh be out of sync with the " + "original StandardSqlDataType.TypeKind enum from gapic. Check " + "enum members: {}".format(", ".join(unhandled_names)) + ) + warnings.warn(msg, UserWarning) + + new_enum = enum.Enum( + "StandardSqlDataTypes", + ( + (member.name, member.value) + for member in gapic_enums.StandardSqlDataType.TypeKind + if member.name in sql_scalar_types + ), + ) + + # make sure the docstring for the new enum is also correct + orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ + skip_pattern = re.compile( + "|".join(excluded_members) + + "|because a JSON object" # the second description line of STRUCT member + ) + + new_doc = "\n".join( + six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) + ) + new_enum.__doc__ = new_doc + + return new_enum + + +StandardSqlDataTypes = _make_sql_scalars_enum() diff --git a/bigquery/tests/unit/test_enums.py b/bigquery/tests/unit/test_enums.py new file mode 100644 index 000000000000..0e104d53ee8a --- /dev/null +++ b/bigquery/tests/unit/test_enums.py @@ -0,0 +1,85 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import enum +import mock +import pytest +import six + + +@pytest.fixture +def enum_under_test(): + from google.cloud.bigquery.enums import StandardSqlDataTypes + + return StandardSqlDataTypes + + +@pytest.fixture +def gapic_enum(): + """The referential autogenerated enum the enum under test is based on.""" + from google.cloud.bigquery_v2.gapic.enums import StandardSqlDataType + + return StandardSqlDataType.TypeKind + + +def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): + # check the presence of a few typical SQL types + for name in ("INT64", "FLOAT64", "DATE", "BOOL"): + assert name in enum_under_test.__members__ + + # the enum members must match those in the original gapic enum + for member in enum_under_test: + assert member.name in gapic_enum.__members__ + assert member.value == gapic_enum[member.name].value + + # check a few members that should *not* be copied over from the gapic enum + for name in ("GEOGRAPHY", "ARRAY"): + assert name in gapic_enum.__members__ + assert name not in enum_under_test.__members__ + + +def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): + assert "STRUCT (int):" not in enum_under_test.__doc__ + assert "BOOL (int):" in enum_under_test.__doc__ + assert "TIME (int):" in enum_under_test.__doc__ + + # all lines in the docstring should actually come from the original docstring + doc_lines = enum_under_test.__doc__.splitlines() + assert set(doc_lines) <= set(gapic_enum.__doc__.splitlines()) + + +def test_standard_sql_types_enum_warning_on_new_added_types(gapic_enum): + class ReplacementEnum(enum.IntEnum): + """Fake enum with some new database types.""" + + INT64 = 2 + TIMESTAMP = 19 + TROOLEAN = 911 # One of {True, False, FileNotFound}. (OMG, help!) + + gapic_enum_patch = mock.patch( + "google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType.TypeKind", + new=ReplacementEnum, + ) + + from google.cloud.bigquery import enums + + with pytest.warns(UserWarning) as warn_record, gapic_enum_patch: + enums = six.moves.reload_module(enums) + + try: + warning_msg = str(warn_record[0].message) + assert "StandardSqlDataTypes" in warning_msg + assert "out of sync" in warning_msg + finally: + six.moves.reload_module(enums) # regenerate enum with original gapic enum From 84f167eb1b5cbbc7e4b76a1a400f022c1cc64468 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 26 Jul 2019 18:14:30 +0200 Subject: [PATCH 2/6] Move StandardSqlDataTypes tests under enums/ dir --- bigquery/tests/unit/enums/__init__.py | 13 +++++++++++++ .../test_standard_sql_data_types.py} | 0 2 files changed, 13 insertions(+) create mode 100644 bigquery/tests/unit/enums/__init__.py rename bigquery/tests/unit/{test_enums.py => enums/test_standard_sql_data_types.py} (100%) diff --git a/bigquery/tests/unit/enums/__init__.py b/bigquery/tests/unit/enums/__init__.py new file mode 100644 index 000000000000..c5cce043083c --- /dev/null +++ b/bigquery/tests/unit/enums/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019, Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigquery/tests/unit/test_enums.py b/bigquery/tests/unit/enums/test_standard_sql_data_types.py similarity index 100% rename from bigquery/tests/unit/test_enums.py rename to bigquery/tests/unit/enums/test_standard_sql_data_types.py From 70292d873f1ab69504fa15f237c0a697a66dceed Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 26 Jul 2019 18:20:46 +0200 Subject: [PATCH 3/6] Treat GEOGRAPHY as scalar SQL type in enum --- bigquery/google/cloud/bigquery/enums.py | 5 ++--- bigquery/tests/unit/enums/test_standard_sql_data_types.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py index f35e3fda3340..c5bdfb86a3ed 100644 --- a/bigquery/google/cloud/bigquery/enums.py +++ b/bigquery/google/cloud/bigquery/enums.py @@ -34,12 +34,11 @@ def _make_sql_scalars_enum(): "DATE", "TIME", "DATETIME", + "GEOGRAPHY", "NUMERIC", ) ) - excluded_members = frozenset( - ("TYPE_KIND_UNSPECIFIED", "GEOGRAPHY", "ARRAY", "STRUCT") - ) + excluded_members = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) # Sanity check - we do not want the new enum to go out of sync with the original # enum from gapic. diff --git a/bigquery/tests/unit/enums/test_standard_sql_data_types.py b/bigquery/tests/unit/enums/test_standard_sql_data_types.py index 0e104d53ee8a..57b81bb42089 100644 --- a/bigquery/tests/unit/enums/test_standard_sql_data_types.py +++ b/bigquery/tests/unit/enums/test_standard_sql_data_types.py @@ -35,7 +35,7 @@ def gapic_enum(): def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): # check the presence of a few typical SQL types - for name in ("INT64", "FLOAT64", "DATE", "BOOL"): + for name in ("INT64", "FLOAT64", "DATE", "BOOL", "GEOGRAPHY"): assert name in enum_under_test.__members__ # the enum members must match those in the original gapic enum @@ -44,7 +44,7 @@ def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): assert member.value == gapic_enum[member.name].value # check a few members that should *not* be copied over from the gapic enum - for name in ("GEOGRAPHY", "ARRAY"): + for name in ("STRUCT", "ARRAY"): assert name in gapic_enum.__members__ assert name not in enum_under_test.__members__ From 4da6e7880c448c502f5e73ed651d3474c36eb7cd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 26 Jul 2019 18:34:13 +0200 Subject: [PATCH 4/6] Use more descriptive name in generator expression --- bigquery/google/cloud/bigquery/enums.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py index c5bdfb86a3ed..155cc3873dd5 100644 --- a/bigquery/google/cloud/bigquery/enums.py +++ b/bigquery/google/cloud/bigquery/enums.py @@ -44,7 +44,7 @@ def _make_sql_scalars_enum(): # enum from gapic. # ASSUMPTION: No existing types are ever renamed or deleted, we only try to # detect cases when new types are introduced. - gapic_names = set(m.name for m in gapic_enums.StandardSqlDataType.TypeKind) + gapic_names = set(type_.name for type_ in gapic_enums.StandardSqlDataType.TypeKind) anticipated_names = sql_scalar_types | excluded_members unhandled_names = gapic_names - anticipated_names From 95c13df1355515ee13ccf599415bb992059719d9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 26 Jul 2019 19:07:24 +0200 Subject: [PATCH 5/6] Replace enum out of sync warning with a loud test --- bigquery/google/cloud/bigquery/enums.py | 54 +++++++------------ .../enums/test_standard_sql_data_types.py | 44 ++++++--------- 2 files changed, 35 insertions(+), 63 deletions(-) diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py index 155cc3873dd5..96ac46be304e 100644 --- a/bigquery/google/cloud/bigquery/enums.py +++ b/bigquery/google/cloud/bigquery/enums.py @@ -13,7 +13,6 @@ # limitations under the License. import re -import warnings import enum import six @@ -21,54 +20,41 @@ from google.cloud.bigquery_v2.gapic import enums as gapic_enums -def _make_sql_scalars_enum(): - """Create an enum based on a gapic enum containing only SQL scalar types.""" - sql_scalar_types = frozenset( - ( - "INT64", - "BOOL", - "FLOAT64", - "STRING", - "BYTES", - "TIMESTAMP", - "DATE", - "TIME", - "DATETIME", - "GEOGRAPHY", - "NUMERIC", - ) +_SQL_SCALAR_TYPES = frozenset( + ( + "INT64", + "BOOL", + "FLOAT64", + "STRING", + "BYTES", + "TIMESTAMP", + "DATE", + "TIME", + "DATETIME", + "GEOGRAPHY", + "NUMERIC", ) - excluded_members = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) +) - # Sanity check - we do not want the new enum to go out of sync with the original - # enum from gapic. - # ASSUMPTION: No existing types are ever renamed or deleted, we only try to - # detect cases when new types are introduced. - gapic_names = set(type_.name for type_ in gapic_enums.StandardSqlDataType.TypeKind) - anticipated_names = sql_scalar_types | excluded_members - unhandled_names = gapic_names - anticipated_names +_SQL_NONSCALAR_TYPES = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) - if unhandled_names: - msg = ( - "The StandardSqlDataTypes enum migh be out of sync with the " - "original StandardSqlDataType.TypeKind enum from gapic. Check " - "enum members: {}".format(", ".join(unhandled_names)) - ) - warnings.warn(msg, UserWarning) + +def _make_sql_scalars_enum(): + """Create an enum based on a gapic enum containing only SQL scalar types.""" new_enum = enum.Enum( "StandardSqlDataTypes", ( (member.name, member.value) for member in gapic_enums.StandardSqlDataType.TypeKind - if member.name in sql_scalar_types + if member.name in _SQL_SCALAR_TYPES ), ) # make sure the docstring for the new enum is also correct orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ skip_pattern = re.compile( - "|".join(excluded_members) + "|".join(_SQL_NONSCALAR_TYPES) + "|because a JSON object" # the second description line of STRUCT member ) diff --git a/bigquery/tests/unit/enums/test_standard_sql_data_types.py b/bigquery/tests/unit/enums/test_standard_sql_data_types.py index 57b81bb42089..6fab79b218d8 100644 --- a/bigquery/tests/unit/enums/test_standard_sql_data_types.py +++ b/bigquery/tests/unit/enums/test_standard_sql_data_types.py @@ -12,10 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import enum -import mock import pytest -import six + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import enums + + return enums @pytest.fixture @@ -33,6 +37,14 @@ def gapic_enum(): return StandardSqlDataType.TypeKind +def test_all_gapic_enum_members_are_known(module_under_test, gapic_enum): + gapic_names = set(type_.name for type_ in gapic_enum) + anticipated_names = ( + module_under_test._SQL_SCALAR_TYPES | module_under_test._SQL_NONSCALAR_TYPES + ) + assert not (gapic_names - anticipated_names) # no unhandled names + + def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): # check the presence of a few typical SQL types for name in ("INT64", "FLOAT64", "DATE", "BOOL", "GEOGRAPHY"): @@ -57,29 +69,3 @@ def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): # all lines in the docstring should actually come from the original docstring doc_lines = enum_under_test.__doc__.splitlines() assert set(doc_lines) <= set(gapic_enum.__doc__.splitlines()) - - -def test_standard_sql_types_enum_warning_on_new_added_types(gapic_enum): - class ReplacementEnum(enum.IntEnum): - """Fake enum with some new database types.""" - - INT64 = 2 - TIMESTAMP = 19 - TROOLEAN = 911 # One of {True, False, FileNotFound}. (OMG, help!) - - gapic_enum_patch = mock.patch( - "google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType.TypeKind", - new=ReplacementEnum, - ) - - from google.cloud.bigquery import enums - - with pytest.warns(UserWarning) as warn_record, gapic_enum_patch: - enums = six.moves.reload_module(enums) - - try: - warning_msg = str(warn_record[0].message) - assert "StandardSqlDataTypes" in warning_msg - assert "out of sync" in warning_msg - finally: - six.moves.reload_module(enums) # regenerate enum with original gapic enum From 4ebdd9116ff9ff745bf5fea69eb1cabc59be0bd1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 29 Jul 2019 11:18:56 +0200 Subject: [PATCH 6/6] Add Enums section to BigQuery API reference docs --- bigquery/docs/reference.rst | 10 ++++++++++ bigquery/google/cloud/bigquery/enums.py | 2 +- .../tests/unit/enums/test_standard_sql_data_types.py | 6 ++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/bigquery/docs/reference.rst b/bigquery/docs/reference.rst index 39b3e8407d30..e01443808795 100644 --- a/bigquery/docs/reference.rst +++ b/bigquery/docs/reference.rst @@ -164,6 +164,16 @@ Magics magics + +Enums +===== + +.. autosummary:: + :toctree: generated + + enums.StandardSqlDataTypes + + Additional Types ================ diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py index 96ac46be304e..098a918e474f 100644 --- a/bigquery/google/cloud/bigquery/enums.py +++ b/bigquery/google/cloud/bigquery/enums.py @@ -61,7 +61,7 @@ def _make_sql_scalars_enum(): new_doc = "\n".join( six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) ) - new_enum.__doc__ = new_doc + new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc return new_enum diff --git a/bigquery/tests/unit/enums/test_standard_sql_data_types.py b/bigquery/tests/unit/enums/test_standard_sql_data_types.py index 6fab79b218d8..6fa4f057fb98 100644 --- a/bigquery/tests/unit/enums/test_standard_sql_data_types.py +++ b/bigquery/tests/unit/enums/test_standard_sql_data_types.py @@ -66,6 +66,8 @@ def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): assert "BOOL (int):" in enum_under_test.__doc__ assert "TIME (int):" in enum_under_test.__doc__ - # all lines in the docstring should actually come from the original docstring + # All lines in the docstring should actually come from the original docstring, + # except for the header. + assert "An Enum of scalar SQL types." in enum_under_test.__doc__ doc_lines = enum_under_test.__doc__.splitlines() - assert set(doc_lines) <= set(gapic_enum.__doc__.splitlines()) + assert set(doc_lines[1:]) <= set(gapic_enum.__doc__.splitlines())