Skip to content

Commit

Permalink
Merge pull request #2402 from fishtown-analytics/feature/bigquery-col…
Browse files Browse the repository at this point in the history
…umn-comments

Feature/bigquery column comments
  • Loading branch information
drewbanin authored May 7, 2020
2 parents 9cd7cbc + 7122a3f commit 784ab79
Show file tree
Hide file tree
Showing 21 changed files with 197 additions and 150 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## dbt 0.17.0 (Release TBD)

### Features
- Suport column descriptions for BigQuery models ([#2335](https://github.com/fishtown-analytics/dbt/issues/2335), [#2402](https://github.com/fishtown-analytics/dbt/pull/2402))

### Fixes
- When tracking is disabled due to errors, do not reset the invocation ID ([#2398](https://github.com/fishtown-analytics/dbt/issues/2398), [#2400](https://github.com/fishtown-analytics/dbt/pull/2400))
- Fix for logic error in compilation errors for duplicate data test names ([#2406](https://github.com/fishtown-analytics/dbt/issues/2406), [#2407](https://github.com/fishtown-analytics/dbt/pull/2407))
Expand Down
24 changes: 24 additions & 0 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ def require(self, name, validator=None):
def get(self, name, validator=None, default=None):
return ''

def persist_relation_docs(self) -> bool:
return False

def persist_column_docs(self) -> bool:
return False


class RuntimeConfigObject(Config):
def __init__(
Expand Down Expand Up @@ -249,6 +255,24 @@ def get(self, name, validator=None, default=None):

return to_return

def persist_relation_docs(self) -> bool:
persist_docs = self.get('persist_docs', default={})
if not isinstance(persist_docs, dict):
raise_compiler_error(
f"Invalid value provided for 'persist_docs'. Expected dict "
f"but received {type(persist_docs)}")

return persist_docs.get('relation', False)

def persist_column_docs(self) -> bool:
persist_docs = self.get('persist_docs', default={})
if not isinstance(persist_docs, dict):
raise_compiler_error(
f"Invalid value provided for 'persist_docs'. Expected dict "
f"but received {type(persist_docs)}")

return persist_docs.get('columns', False)


# `adapter` implementations
class ParseDatabaseWrapper(BaseDatabaseWrapper):
Expand Down
32 changes: 13 additions & 19 deletions core/dbt/include/global_project/macros/adapters/common.sql
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter_macro('persist_docs', relation, model, for_relation, for_columns)) }}
{% endmacro %}

{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}

{% if for_columns and config.persist_column_docs() %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}



Expand Down Expand Up @@ -304,22 +317,3 @@
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}


{%- macro set_relation_comment(relation) -%}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
{%- set comment = get_relation_comment(raw_persist_docs, model) -%}
{%- if comment is not none -%}
{{ alter_relation_comment(relation, comment) }}
{%- endif -%}
{%- endmacro -%}


{%- macro set_column_comments(relation) -%}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
{%- set column_dict = get_relation_column_comments(raw_persist_docs, model) -%}
{%- if column_dict is not none -%}
{{ alter_column_comment(relation, column_dict) }}
{%- endif -%}
{%- endmacro -%}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
{{ build_sql }}
{% endcall %}

{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks, inside_transaction=True) }}

-- `COMMIT` happens here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,16 @@
{{ sql }}
{% endcall %}

{% set target_relation = this.incorporate(type='table') %}
{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks, inside_transaction=True) }}

-- `COMMIT` happens here
{{ adapter.commit() }}

{{ run_hooks(post_hooks, inside_transaction=False) }}

{% set target_relation = this.incorporate(type='table') %}
{{ return({'relations': [target_relation]}) }}

{% endmaterialization %}
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@
{{ final_sql }}
{% endcall %}

{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks, inside_transaction=True) }}

{{ adapter.commit() }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@

{{ run_hooks(post_hooks, inside_transaction=True) }}

{% do persist_docs(target_relation, model) %}

-- `COMMIT` happens here
{{ adapter.commit() }}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
{% endif %}
{{ adapter.rename_relation(intermediate_relation, target_relation) }}

{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks, inside_transaction=True) }}

{{ adapter.commit() }}
Expand Down
67 changes: 63 additions & 4 deletions plugins/bigquery/dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,20 @@
import google.cloud.exceptions
import google.cloud.bigquery

from google.cloud.bigquery import SchemaField

import time
import agate
import json


def sql_escape(string):
if not isinstance(string, str):
dbt.exceptions.raise_compiler_exception(
f'cannot escape a non-string: {string}'
)

return json.dumps(string)[1:-1]


@dataclass
Expand Down Expand Up @@ -320,16 +332,14 @@ def _get_dbt_columns_from_bq_table(self, table) -> List[BigQueryColumn]:

def _agate_to_schema(
self, agate_table: agate.Table, column_override: Dict[str, str]
) -> List[google.cloud.bigquery.SchemaField]:
) -> List[SchemaField]:
"""Convert agate.Table with column names to a list of bigquery schemas.
"""
bq_schema = []
for idx, col_name in enumerate(agate_table.column_names):
inferred_type = self.convert_agate_type(agate_table, idx)
type_ = column_override.get(col_name, inferred_type)
bq_schema.append(
google.cloud.bigquery.SchemaField(col_name, type_)
)
bq_schema.append(SchemaField(col_name, type_))
return bq_schema

def _materialize_as_view(self, model: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -545,6 +555,33 @@ def parse_partition_by(
"""
return PartitionConfig.parse(raw_partition_by)

def get_table_ref_from_relation(self, conn, relation):
return self.connections.table_ref(relation.database,
relation.schema,
relation.identifier,
conn)

@available.parse_none
def update_column_descriptions(self, relation, columns):
if len(columns) == 0:
return

conn = self.connections.get_thread_connection()
table_ref = self.get_table_ref_from_relation(conn, relation)
table = conn.handle.get_table(table_ref)

new_schema = []
for column in table.schema:
if column.name in columns:
column_config = columns[column.name]
column_dict = column.to_api_repr()
column_dict['description'] = column_config.get('description')
column = SchemaField.from_api_repr(column_dict)
new_schema.append(column)

new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
conn.handle.update_table(new_table, ['schema'])

@available.parse_none
def alter_table_add_columns(self, relation, columns):

Expand Down Expand Up @@ -614,3 +651,25 @@ def _get_cache_schemas(
.format(database, candidate.schema)
)
return result

@available.parse(lambda *a, **k: {})
def get_table_options(
self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool
) -> Dict[str, Any]:
opts = {}
if temporary:
expiration = 'TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 12 hour)'
opts['expiration_timestamp'] = expiration

if config.persist_relation_docs() and 'description' in node:
description = sql_escape(node['description'])
opts['description'] = '"""{}"""'.format(description)

if config.get('kms_key_name') is not None:
opts['kms_key_name'] = "'{}'".format(config.get('kms_key_name'))

if config.get('labels'):
labels = config.get('labels', {})
opts['labels'] = list(labels.items())

return opts
55 changes: 15 additions & 40 deletions plugins/bigquery/dbt/include/bigquery/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,8 @@
{%- endmacro -%}


{%- macro bigquery_escape_comment(comment) -%}
{%- if comment is not string -%}
{%- do exceptions.raise_compiler_exception('cannot escape a non-string: ' ~ comment) -%}
{%- endif -%}
{%- do return((comment | tojson)[1:-1]) -%}
{%- endmacro -%}


{% macro bigquery_table_options(persist_docs, temporary, kms_key_name, labels) %}
{% set opts = {} -%}

{%- set description = get_relation_comment(persist_docs, model) -%}
{%- if description is not none -%}
{%- do opts.update({'description': "'" ~ bigquery_escape_comment(description) ~ "'"}) -%}
{%- endif -%}
{%- if temporary -%}
{% do opts.update({'expiration_timestamp': 'TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 12 hour)'}) %}
{%- endif -%}
{%- if kms_key_name -%}
{%- do opts.update({'kms_key_name': "'" ~ kms_key_name ~ "'"}) -%}
{%- endif -%}
{%- if labels -%}
{%- set label_list = [] -%}
{%- for label, value in labels.items() -%}
{%- do label_list.append((label, value)) -%}
{%- endfor -%}
{%- do opts.update({'labels': label_list}) -%}
{%- endif -%}
{% macro bigquery_table_options(config, node, temporary) %}
{% set opts = adapter.get_table_options(config, node, temporary) %}

{% set options -%}
OPTIONS({% for opt_key, opt_val in opts.items() %}
Expand All @@ -68,9 +42,6 @@
{% macro bigquery__create_table_as(temporary, relation, sql) -%}
{%- set raw_partition_by = config.get('partition_by', none) -%}
{%- set raw_cluster_by = config.get('cluster_by', none) -%}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
{%- set raw_kms_key_name = config.get('kms_key_name', none) -%}
{%- set raw_labels = config.get('labels', {}) -%}
{%- set sql_header = config.get('sql_header', none) -%}

{%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%}
Expand All @@ -80,27 +51,20 @@
create or replace table {{ relation }}
{{ partition_by(partition_config) }}
{{ cluster_by(raw_cluster_by) }}
{{ bigquery_table_options(
persist_docs=raw_persist_docs,
temporary=temporary,
kms_key_name=raw_kms_key_name,
labels=raw_labels
) }}
{{ bigquery_table_options(config, model, temporary) }}
as (
{{ sql }}
);

{%- endmacro -%}

{% macro bigquery__create_view_as(relation, sql) -%}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
{%- set raw_labels = config.get('labels', []) -%}
{%- set sql_header = config.get('sql_header', none) -%}

{{ sql_header if sql_header is not none }}

create or replace view {{ relation }}
{{ bigquery_table_options(persist_docs=raw_persist_docs, temporary=false, labels=raw_labels) }}
{{ bigquery_table_options(config, model, temporary=false) }}
as (
{{ sql }}
);
Expand Down Expand Up @@ -149,3 +113,14 @@
{% macro bigquery__check_schema_exists(information_schema, schema) %}
{{ return(adapter.check_schema_exists(information_schema.database, schema)) }}
{% endmacro %}

{#-- relation-level macro is not implemented. This is handled in the CTAs statement #}
{% macro bigquery__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_columns and config.persist_column_docs() %}
{% do alter_column_comment(relation, model.columns) %}
{% endif %}
{% endmacro %}

{% macro bigquery__alter_column_comment(relation, column_dict) -%}
{% do adapter.update_column_descriptions(relation, column_dict) %}
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@

{{ run_hooks(post_hooks) }}

{% set target_relation = this.incorporate(type='table') %}

{% do persist_docs(target_relation, model) %}

{{ return({'relations': [target_relation]}) }}

{%- endmaterialization %}
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@

{{ run_hooks(post_hooks) }}

{% do persist_docs(target_relation, model) %}

{{ return({'relations': [target_relation]}) }}

{% endmaterialization %}
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@


{% materialization view, adapter='bigquery' -%}
{{ return(create_or_replace_view(run_outside_transaction_hooks=False)) }}
{% set to_return = create_or_replace_view(run_outside_transaction_hooks=False) %}

{% set target_relation = this.incorporate(type='view') %}
{% do persist_docs(target_relation, model) %}

{% do return(to_return) %}

{%- endmaterialization %}
Loading

0 comments on commit 784ab79

Please sign in to comment.