From 107bc5c2170472e2a619ad17bd408f881483ac56 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 14 Apr 2020 13:23:12 -0600 Subject: [PATCH 1/2] Use "describe table" to get the columns in a relation on snowflake --- CHANGELOG.md | 1 + core/dbt/adapters/base/column.py | 47 ++++++++++++++++++- .../dbt/include/snowflake/macros/adapters.sql | 40 +++++++--------- 3 files changed, 64 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d41d179294..82d45396d08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Added support for `db_groups` and `autocreate` flags in Redshift configurations. ([#1995](https://github.com/fishtown-analytics/dbt/issues/1995), [#2262](https://github.com/fishtown-analytics/dbt/pull/2262)) - Users can supply paths as arguments to `--models` and `--select`, either explicitily by prefixing with `path:` or implicitly with no prefix. ([#454](https://github.com/fishtown-analytics/dbt/issues/454), [#2258](https://github.com/fishtown-analytics/dbt/pull/2258)) - dbt now builds the relation cache for "dbt compile" and "dbt ls" as well as "dbt run" ([#1705](https://github.com/fishtown-analytics/dbt/issues/1705), [#2319](https://github.com/fishtown-analytics/dbt/pull/2319)) +- Snowflake now uses "describe table" to get the columns in a relation ([#2260](https://github.com/fishtown-analytics/dbt/issues/2260), [#2324](https://github.com/fishtown-analytics/dbt/pull/2324)) ### Fixes - When a jinja value is undefined, give a helpful error instead of failing with cryptic "cannot pickle ParserMacroCapture" errors ([#2110](https://github.com/fishtown-analytics/dbt/issues/2110), [#2184](https://github.com/fishtown-analytics/dbt/pull/2184)) diff --git a/core/dbt/adapters/base/column.py b/core/dbt/adapters/base/column.py index 75b41e0b3f6..24bb7599952 100644 --- a/core/dbt/adapters/base/column.py +++ b/core/dbt/adapters/base/column.py @@ -1,6 +1,8 @@ from dataclasses import dataclass +import re from hologram import JsonSchemaMixin +from dbt.exceptions import RuntimeException from typing import Dict, ClassVar, Any, Optional @@ -74,7 +76,7 @@ def is_numeric(self) -> bool: def string_size(self) -> int: if not self.is_string(): - raise RuntimeError("Called string_size() on non-string field!") + raise RuntimeException("Called string_size() on non-string field!") if self.dtype == 'text' or self.char_size is None: # char_size should never be None. Handle it reasonably just in case @@ -108,3 +110,46 @@ def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str: def __repr__(self) -> str: return "".format(self.name, self.data_type) + + @classmethod + def from_description(cls, name: str, raw_data_type: str) -> 'Column': + match = re.match(r'([^(]+)(\([^)]+\))?', raw_data_type) + if match is None: + raise RuntimeException( + f'Could not interpret data type "{raw_data_type}"' + ) + data_type, size_info = match.groups() + char_size = None + numeric_precision = None + numeric_scale = None + if size_info is not None: + # strip out the parentheses + size_info = size_info[1:-1] + parts = size_info.split(',') + if len(parts) == 1: + try: + char_size = int(parts[0]) + except ValueError: + raise RuntimeException( + f'Could not interpret data_type "{raw_data_type}": ' + f'could not convert "{parts[0]}" to an integer' + ) + elif len(parts) == 2: + try: + numeric_precision = int(parts[0]) + except ValueError: + raise RuntimeException( + f'Could not interpret data_type "{raw_data_type}": ' + f'could not convert "{parts[0]}" to an integer' + ) + try: + numeric_scale = int(parts[1]) + except ValueError: + raise RuntimeException( + f'Could not interpret data_type "{raw_data_type}": ' + f'could not convert "{parts[1]}" to an integer' + ) + + return cls( + name, data_type, char_size, numeric_precision, numeric_scale + ) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 9d051c4e39c..243c948111e 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -52,31 +52,25 @@ {% endmacro %} {% macro snowflake__get_columns_in_relation(relation) -%} - {% call statement('get_columns_in_relation', fetch_result=True) %} - select - column_name, - data_type, - character_maximum_length, - numeric_precision, - numeric_scale - - from - {{ relation.information_schema('columns') }} - - where table_name ilike '{{ relation.identifier }}' - {% if relation.schema %} - and table_schema ilike '{{ relation.schema }}' - {% endif %} - {% if relation.database %} - and table_catalog ilike '{{ relation.database }}' - {% endif %} - order by ordinal_position + {%- set sql -%} + describe table {{ relation }} + {%- endset -%} + {%- set result = run_query(sql) -%} - {% endcall %} - - {% set table = load_result('get_columns_in_relation').table %} - {{ return(sql_convert_columns_in_relation(table)) }} + {% set maximum = 10000 %} + {% if (result | length) >= maximum %} + {% set msg %} + Too many columns in relation {{ relation }}! dbt can only get + information about relations with fewer than {{ maximum }} columns. + {% endset %} + {% do exceptions.raise_compiler_error(msg) %} + {% endif %} + {% set columns = [] %} + {% for row in result %} + {% do columns.append(api.Column.from_description(row['name'], row['type'])) %} + {% endfor %} + {% do return(columns) %} {% endmacro %} {% macro snowflake__list_schemas(database) -%} From bc38750d47d51e5d0af952624042c39481411d40 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 15 Apr 2020 10:11:30 -0600 Subject: [PATCH 2/2] PR feedback, tests --- .../dbt/adapters/snowflake/column.py | 10 +++ test/unit/test_snowflake_adapter.py | 70 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/plugins/snowflake/dbt/adapters/snowflake/column.py b/plugins/snowflake/dbt/adapters/snowflake/column.py index caa7cbdc3d5..d7afb307fac 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/column.py +++ b/plugins/snowflake/dbt/adapters/snowflake/column.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from dbt.adapters.base.column import Column +from dbt.exceptions import RuntimeException @dataclass @@ -19,3 +20,12 @@ def is_float(self): return self.dtype.lower() in [ 'float', 'float4', 'float8', 'double', 'double precision', 'real', ] + + def string_size(self) -> int: + if not self.is_string(): + raise RuntimeException("Called string_size() on non-string field!") + + if self.dtype == 'text' or self.char_size is None: + return 16777216 + else: + return int(self.char_size) diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py index b2ffbd6c7eb..79c5bfe0d4d 100644 --- a/test/unit/test_snowflake_adapter.py +++ b/test/unit/test_snowflake_adapter.py @@ -6,6 +6,7 @@ import dbt.flags as flags from dbt.adapters.snowflake import SnowflakeAdapter +from dbt.adapters.snowflake.column import SnowflakeColumn from dbt.adapters.base.query_headers import MacroQueryStringSetter from dbt.clients import agate_helper from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -449,3 +450,72 @@ def test_convert_time_type(self): expected = ['time', 'time', 'time'] for col_idx, expect in enumerate(expected): assert SnowflakeAdapter.convert_time_type(agate_table, col_idx) == expect + + +class TestSnowflakeColumn(unittest.TestCase): + def test_text_from_description(self): + col = SnowflakeColumn.from_description('my_col', 'TEXT') + assert col.column == 'my_col' + assert col.dtype == 'TEXT' + assert col.char_size is None + assert col.numeric_precision is None + assert col.numeric_scale is None + assert col.is_float() is False + assert col.is_number() is False + assert col.is_numeric() is False + assert col.is_string() is True + assert col.is_integer() is False + assert col.string_size() == 16777216 + + col = SnowflakeColumn.from_description('my_col', 'VARCHAR') + assert col.column == 'my_col' + assert col.dtype == 'VARCHAR' + assert col.char_size is None + assert col.numeric_precision is None + assert col.numeric_scale is None + assert col.is_float() is False + assert col.is_number() is False + assert col.is_numeric() is False + assert col.is_string() is True + assert col.is_integer() is False + assert col.string_size() == 16777216 + + def test_sized_varchar_from_description(self): + col = SnowflakeColumn.from_description('my_col', 'VARCHAR(256)') + assert col.column == 'my_col' + assert col.dtype == 'VARCHAR' + assert col.char_size == 256 + assert col.numeric_precision is None + assert col.numeric_scale is None + assert col.is_float() is False + assert col.is_number() is False + assert col.is_numeric() is False + assert col.is_string() is True + assert col.is_integer() is False + assert col.string_size() == 256 + + def test_sized_decimal_from_description(self): + col = SnowflakeColumn.from_description('my_col', 'DECIMAL(1, 0)') + assert col.column == 'my_col' + assert col.dtype == 'DECIMAL' + assert col.char_size is None + assert col.numeric_precision == 1 + assert col.numeric_scale == 0 + assert col.is_float() is False + assert col.is_number() is True + assert col.is_numeric() is True + assert col.is_string() is False + assert col.is_integer() is False + + def test_float_from_description(self): + col = SnowflakeColumn.from_description('my_col', 'FLOAT8') + assert col.column == 'my_col' + assert col.dtype == 'FLOAT8' + assert col.char_size is None + assert col.numeric_precision is None + assert col.numeric_scale is None + assert col.is_float() is True + assert col.is_number() is True + assert col.is_numeric() is False + assert col.is_string() is False + assert col.is_integer() is False