dbt-labs · drewbanin · Aug 29, 2017 · Aug 15, 2017 · Aug 15, 2017 · Aug 15, 2017
diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py
@@ -16,16 +16,23 @@
 import google.cloud.exceptions
 import google.cloud.bigquery
 
+import time
+import uuid
+
 
 class BigQueryAdapter(PostgresAdapter):
 
     context_functions = [
         "query_for_existing",
-        "drop_view",
         "execute_model",
+        "drop",
     ]
 
-    QUERY_TIMEOUT = 60 * 1000
+    SCOPE = ('https://www.googleapis.com/auth/bigquery',
+             'https://www.googleapis.com/auth/cloud-platform',
+             'https://www.googleapis.com/auth/drive')
+
+    QUERY_TIMEOUT = 300
 
     @classmethod
     def handle_error(cls, error, message, sql):
@@ -81,12 +88,12 @@ def get_bigquery_credentials(cls, config):
         creds = google.oauth2.service_account.Credentials
 
         if method == 'oauth':
-            credentials, project_id = google.auth.default()
+            credentials, project_id = google.auth.default(scopes=cls.SCOPE)
             return credentials
 
         elif method == 'service-account':
             keyfile = config.get('keyfile')
-            return creds.from_service_account_file(keyfile)
+            return creds.from_service_account_file(keyfile, scopes=cls.SCOPE)
 
         elif method == 'service-account-json':
             details = config.get('keyfile_json')
@@ -111,6 +118,9 @@ def open_connection(cls, connection):
         result = connection.copy()
         credentials = connection.get('credentials', {})
 
+        if 'timeout_seconds' in credentials:
+            cls.QUERY_TIMEOUT = credentials['timeout_seconds']
+
         try:
             handle = cls.get_bigquery_client(credentials)
 
@@ -141,7 +151,8 @@ def query_for_existing(cls, profile, schema, model_name=None):
 
         relation_type_lookup = {
             'TABLE': 'table',
-            'VIEW': 'view'
+            'VIEW': 'view',
+            'EXTERNAL': 'external'
         }
 
         existing = [(table.name, relation_type_lookup.get(table.table_type))
@@ -150,56 +161,92 @@ def query_for_existing(cls, profile, schema, model_name=None):
         return dict(existing)
 
     @classmethod
-    def drop_view(cls, profile, view_name, model_name):
+    def drop(cls, profile, relation, relation_type, model_name=None):
         schema = cls.get_default_schema(profile)
         dataset = cls.get_dataset(profile, schema, model_name)
-        view = dataset.table(view_name)
-        view.delete()
+        relation_object = dataset.table(relation)
+        relation_object.delete()
 
     @classmethod
     def rename(cls, profile, from_name, to_name, model_name=None):
         raise dbt.exceptions.NotImplementedException(
             '`rename` is not implemented for this adapter!')
 
-    # Hack because of current API limitations. We should set a flag on the
-    # Table object indicating StandardSQL when it's implemented
-    # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3388
     @classmethod
-    def format_sql_for_bigquery(cls, sql):
-        return "#standardSQL\n{}".format(sql)
+    def materialize_as_view(cls, profile, dataset, model_name, model_sql):
+        view = dataset.table(model_name)
+        view.view_query = model_sql
+        view.view_use_legacy_sql = False
 
-    @classmethod
-    def execute_model(cls, profile, model, model_name=None):
-        connection = cls.get_connection(profile, model.get('name'))
+        logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql))
 
-        if flags.STRICT_MODE:
-            validate_connection(connection)
+        with cls.exception_handler(profile, model_sql, model_name, model_name):
+            view.create()
 
-        model_name = model.get('name')
-        model_sql = cls.format_sql_for_bigquery(model.get('injected_sql'))
+        if view.created is None:
+            msg = "Error creating view {}".format(model_name)
+            raise dbt.exceptions.RuntimeException(msg)
 
-        materialization = dbt.utils.get_materialization(model)
-        allowed_materializations = ['view', 'ephemeral']
+        return "CREATE VIEW"
 
-        if materialization not in allowed_materializations:
-            msg = "Unsupported materialization: {}".format(materialization)
-            raise dbt.exceptions.RuntimeException(msg)
+    @classmethod
+    def poll_until_job_completes(cls, job):
+        retry_count = cls.QUERY_TIMEOUT
+        while retry_count > 0 and job.state != 'DONE':
+            retry_count -= 1
+            time.sleep(1)
+            job.reload()
 
-        schema = cls.get_default_schema(profile)
-        dataset = cls.get_dataset(profile, schema, model_name)
+        if job.state != 'DONE':
+            raise dbt.exceptions.RuntimeException("BigQuery Timeout Exceeded")
 
-        view = dataset.table(model_name)
-        view.view_query = model_sql
+        elif job.error_result:
+            raise job.exception()
+
+    @classmethod
+    def materialize_as_table(cls, profile, dataset, model_name, model_sql):
+        conn = cls.get_connection(profile, model_name)
+        client = conn.get('handle')
+
+        table = dataset.table(model_name)
+        job_id = 'dbt-create-{}-{}'.format(model_name, uuid.uuid4())
+        job = client.run_async_query(job_id, model_sql)
+        job.use_legacy_sql = False
+        job.destination = table
+        job.write_disposition = 'WRITE_TRUNCATE'
+        job.begin()
 
         logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql))
 
         with cls.exception_handler(profile, model_sql, model_name, model_name):
-            view.create()
+            cls.poll_until_job_completes(job)
 
-        if view.created is None:
-            raise RuntimeError("Error creating view {}".format(model_name))
+        return "CREATE TABLE"
 
-        return "CREATE VIEW"
+    @classmethod
+    def execute_model(cls, profile, model, materialization, model_name=None):
+        connection = cls.get_connection(profile, model.get('name'))
+
+        if flags.STRICT_MODE:
+            validate_connection(connection)
+
+        model_name = model.get('name')
+        model_sql = model.get('injected_sql')
+
+        schema = cls.get_default_schema(profile)
+        dataset = cls.get_dataset(profile, schema, model_name)
+
+        if materialization == 'view':
+            res = cls.materialize_as_view(profile, dataset, model_name,
+                                          model_sql)
+        elif materialization == 'table':
+            res = cls.materialize_as_table(profile, dataset, model_name,
+                                           model_sql)
+        else:
+            msg = "Invalid relation type: '{}'".format(materialization)
+            raise dbt.exceptions.RuntimeException(msg, model)
+
+        return res
 
     @classmethod
     def fetch_query_results(cls, query):
@@ -220,12 +267,12 @@ def execute_and_fetch(cls, profile, sql, model_name=None, **kwargs):
         conn = cls.get_connection(profile, model_name)
         client = conn.get('handle')
 
-        formatted_sql = cls.format_sql_for_bigquery(sql)
-        query = client.run_sync_query(formatted_sql)
-        query.timeout_ms = cls.QUERY_TIMEOUT
+        query = client.run_sync_query(sql)
+        query.timeout_ms = cls.QUERY_TIMEOUT * 1000
+        query.use_legacy_sql = False
 
         debug_message = "Fetching data for query {}:\n{}"
-        logger.debug(debug_message.format(model_name, formatted_sql))
+        logger.debug(debug_message.format(model_name, sql))
 
         query.run()
 

diff --git a/dbt/contracts/connection.py b/dbt/contracts/connection.py
@@ -41,6 +41,7 @@
     Required('schema'): basestring,
     Optional('keyfile'): basestring,
     Optional('keyfile_json'): object,
+    Optional('timeout_seconds'): int,
 })
 
 credentials_mapping = {

diff --git a/...macros/materializations/bigquery_view.sql → ...ject/macros/materializations/bigquery.sql b/...macros/materializations/bigquery_view.sql → ...ject/macros/materializations/bigquery.sql
@@ -6,20 +6,31 @@
   {%- set existing = adapter.query_for_existing(schema) -%}
   {%- set existing_type = existing.get(identifier) -%}
 
-  -- setup
   {%- if existing_type is not none -%}
     {{ adapter.drop(identifier, existing_type) }}
   {%- endif -%}
 
   -- build model
-  {% set result = adapter.execute_model(model) %}
+  {% set result = adapter.execute_model(model, 'view') %}
   {{ store_result('main', status=result) }}
 
 {%- endmaterialization %}
 
 {% materialization table, adapter='bigquery' -%}
 
-  {{ exceptions.materialization_not_available(model, 'bigquery') }}
+  {%- set identifier = model['name'] -%}
+  {%- set tmp_identifier = identifier + '__dbt_tmp' -%}
+  {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%}
+  {%- set existing = adapter.query_for_existing(schema) -%}
+  {%- set existing_type = existing.get(identifier) -%}
+
+  {%- if existing_type is not none -%}
+    {{ adapter.drop(identifier, existing_type) }}
+  {%- endif -%}
+
+  -- build model
+  {% set result = adapter.execute_model(model, 'table') %}
+  {{ store_result('main', status=result) }}
 
 {% endmaterialization %}
 

diff --git a/requirements.txt b/requirements.txt
@@ -10,5 +10,5 @@ celery==3.1.23
 voluptuous==0.10.5
 snowflake-connector-python==1.4.0
 colorama==0.3.9
-google-cloud-bigquery==0.24.0
+google-cloud-bigquery==0.26.0
 pyasn1==0.2.3
diff --git a/setup.py b/setup.py
@@ -42,7 +42,7 @@
         'voluptuous==0.10.5',
         'snowflake-connector-python>=1.3.16',
         'colorama==0.3.9',
-        'google-cloud-bigquery==0.24.0',
+        'google-cloud-bigquery==0.26.0',
         'pyasn1==0.2.3',
     ]
 )