From 5f667413139e7344c0271401017b4d52e473e148 Mon Sep 17 00:00:00 2001 From: Sumedh Sakdeo Date: Tue, 14 Aug 2018 16:40:35 -0700 Subject: [PATCH 1/3] Fetch a batch of rows from bigquery --- superset/db_engine_specs.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index c07910af4d2fd..9d58769d6783b 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -1367,6 +1367,15 @@ class BQEngineSpec(BaseEngineSpec): As contributed by @mxmzdlv on issue #945""" engine = 'bigquery' + """ + https://www.python.org/dev/peps/pep-0249/#arraysize + raw_connections bypass the pybigquery query execution context and deal with + raw dbapi connection directly. + If this value is not set, the default value is set to 1, as described here, + https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor + """ + arraysize = 5000 + time_grain_functions = { None: '{col}', 'PT1S': 'TIMESTAMP_TRUNC({col}, SECOND)', @@ -1388,6 +1397,7 @@ def convert_dttm(cls, target_type, dttm): @classmethod def fetch_data(cls, cursor, limit): + cursor.arraysize = 5000 data = super(BQEngineSpec, cls).fetch_data(cursor, limit) if len(data) != 0 and type(data[0]).__name__ == 'Row': data = [r.values() for r in data] From ae7afb5d52732ed98d4ba28d01aeb9abaabc2c88 Mon Sep 17 00:00:00 2001 From: Sumedh Sakdeo Date: Tue, 14 Aug 2018 17:40:08 -0700 Subject: [PATCH 2/3] unused const --- superset/db_engine_specs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 9d58769d6783b..010342a134d06 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -1397,7 +1397,7 @@ def convert_dttm(cls, target_type, dttm): @classmethod def fetch_data(cls, cursor, limit): - cursor.arraysize = 5000 + cursor.arraysize = BQEngineSpec.arraysize data = super(BQEngineSpec, cls).fetch_data(cursor, limit) if len(data) != 0 and type(data[0]).__name__ == 'Row': data = [r.values() for r in data] From 18c48ff07d90f3d6ab6f4219b119102426112831 Mon Sep 17 00:00:00 2001 From: Sumedh Sakdeo Date: Tue, 14 Aug 2018 18:07:04 -0700 Subject: [PATCH 3/3] review comments --- superset/db_engine_specs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 010342a134d06..65289e1e19c44 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -102,6 +102,7 @@ class BaseEngineSpec(object): inner_joins = True allows_subquery = True consistent_case_sensitivity = True # do results have same case as qry for col names? + arraysize = None @classmethod def get_time_grains(cls): @@ -115,6 +116,8 @@ def get_time_grains(cls): @classmethod def fetch_data(cls, cursor, limit): + if cls.arraysize: + cursor.arraysize = cls.arraysize if cls.limit_method == LimitMethod.FETCH_MANY: return cursor.fetchmany(limit) return cursor.fetchall() @@ -1373,6 +1376,9 @@ class BQEngineSpec(BaseEngineSpec): raw dbapi connection directly. If this value is not set, the default value is set to 1, as described here, https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor + + The default value of 5000 is derived from the pybigquery. + https://github.com/mxmzdlv/pybigquery/blob/d214bb089ca0807ca9aaa6ce4d5a01172d40264e/pybigquery/sqlalchemy_bigquery.py#L102 """ arraysize = 5000 @@ -1397,7 +1403,6 @@ def convert_dttm(cls, target_type, dttm): @classmethod def fetch_data(cls, cursor, limit): - cursor.arraysize = BQEngineSpec.arraysize data = super(BQEngineSpec, cls).fetch_data(cursor, limit) if len(data) != 0 and type(data[0]).__name__ == 'Row': data = [r.values() for r in data]