Skip to content

Commit

Permalink
Agate fixes
Browse files Browse the repository at this point in the history
Restrict agate date+datetime formats
remove timedelta
When column types are specified for a seed, parse those columns as Text unconditionally
  • Loading branch information
Jacob Beck committed Nov 20, 2019
1 parent 67d499e commit 0b18212
Show file tree
Hide file tree
Showing 13 changed files with 312 additions and 54 deletions.
62 changes: 50 additions & 12 deletions core/dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,58 @@
from codecs import BOM_UTF8

import agate
import datetime
import isodate
import json
from typing import Iterable


BOM = BOM_UTF8.decode('utf-8') # '\ufeff'

DEFAULT_TYPE_TESTER = agate.TypeTester(types=[
agate.data_types.Number(null_values=('null', '')),
agate.data_types.TimeDelta(null_values=('null', '')),
agate.data_types.Date(null_values=('null', '')),
agate.data_types.DateTime(null_values=('null', '')),
agate.data_types.Boolean(true_values=('true',),
false_values=('false',),
null_values=('null', '')),
agate.data_types.Text(null_values=('null', ''))
])

class ISODateTime(agate.data_types.DateTime):
def cast(self, d):
# this is agate.data_types.DateTime.cast with the "clever" bits removed
# so we only handle ISO8601 stuff
if isinstance(d, datetime.datetime) or d is None:
return d
elif isinstance(d, datetime.date):
return datetime.datetime.combine(d, datetime.time(0, 0, 0))
elif isinstance(d, str):
d = d.strip()
if d.lower() in self.null_values:
return None
try:
return isodate.parse_datetime(d)
except: # noqa
pass

raise agate.exceptions.CastError(
'Can not parse value "%s" as datetime.' % d
)


def build_type_tester(text_columns: Iterable[str]):
types = [
agate.data_types.Number(null_values=('null', '')),
agate.data_types.Date(null_values=('null', ''),
date_format='%Y-%m-%d'),
agate.data_types.DateTime(null_values=('null', ''),
datetime_format='%Y-%m-%d %H:%M:%S'),
ISODateTime(null_values=('null', '')),
agate.data_types.Boolean(true_values=('true',),
false_values=('false',),
null_values=('null', '')),
agate.data_types.Text(null_values=('null', ''))
]
force = {
k: agate.data_types.Text(null_values=('null', ''))
for k in text_columns
}
return agate.TypeTester(force=force, types=types)


DEFAULT_TYPE_TESTER = build_type_tester(())


def table_from_data(data, column_names):
Expand Down Expand Up @@ -61,8 +98,9 @@ def as_matrix(table):
return [r.values() for r in table.rows.values()]


def from_csv(abspath):
def from_csv(abspath, text_columns):
type_tester = build_type_tester(text_columns=text_columns)
with open(abspath, encoding='utf-8') as fp:
if fp.read(1) != BOM:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=DEFAULT_TYPE_TESTER)
return agate.Table.from_csv(fp, column_types=type_tester)
9 changes: 5 additions & 4 deletions core/dbt/context/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing_extensions import Protocol
from typing import Union, Callable, Any, Dict, TypeVar, Type

import dbt.clients.agate_helper
from dbt.clients import agate_helper
from dbt.contracts.graph.compiled import CompiledSeedNode
from dbt.contracts.graph.parsed import ParsedSeedNode
import dbt.exceptions
Expand Down Expand Up @@ -105,11 +105,11 @@ def add_macros(self, context):
def _store_result(sql_results):
def call(name, status, agate_table=None):
if agate_table is None:
agate_table = dbt.clients.agate_helper.empty_table()
agate_table = agate_helper.empty_table()

sql_results[name] = dbt.utils.AttrDict({
'status': status,
'data': dbt.clients.agate_helper.as_matrix(agate_table),
'data': agate_helper.as_matrix(agate_table),
'table': agate_table
})
return ''
Expand Down Expand Up @@ -185,8 +185,9 @@ def _build_load_agate_table(
) -> Callable[[], agate.Table]:
def load_agate_table():
path = model.seed_file_path
column_types = model.config.column_types
try:
table = dbt.clients.agate_helper.from_csv(path)
table = agate_helper.from_csv(path, text_columns=column_types)
except ValueError as e:
dbt.exceptions.raise_compiler_error(str(e))
table.original_abspath = os.path.abspath(path)
Expand Down
1 change: 1 addition & 0 deletions core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def read(fname):
'requests>=2.18.0,<3',
'colorama>=0.3.9,<0.5',
'agate>=1.6,<2',
'isodate>=0.6,<0.7',
'json-rpc>=1.12,<2',
'werkzeug>=0.15,<0.17',
'dataclasses==0.6;python_version<"3.7"',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,id_str,a_bool,looks_like_a_bool,a_date,looks_like_a_date,relative,weekday
1,1,true,true,2019-01-01 12:32:30,2019-01-01 12:32:30,tomorrow,Saturday
2,2,True,True,2019-01-01 12:32:31,2019-01-01 12:32:31,today,Sunday
3,3,TRUE,TRUE,2019-01-01 12:32:32,2019-01-01 12:32:32,yesterday,Monday
4,4,false,false,2019-01-01 01:32:32,2019-01-01 01:32:32,tomorrow,Saturday
5,5,False,False,2019-01-01 01:32:32,2019-01-01 01:32:32,today,Sunday
6,6,FALSE,FALSE,2019-01-01 01:32:32,2019-01-01 01:32:32,yesterday,Monday
4 changes: 4 additions & 0 deletions test/integration/005_simple_seed_test/macros/schema_test.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
{% endfor %}

{% set val = 0 if col_types.get(column_name) == type else 1 %}
{% if val == 1 and execute %}
{# I'm so tired of guessing what's wrong, let's just log it #}
{{ log('Got a column type of ' ~ col_types.get(column_name) ~ ', expected ' ~ type, info=True) }}
{% endif %}

select {{ val }} as pass_fail

Expand Down
35 changes: 35 additions & 0 deletions test/integration/005_simple_seed_test/models-bq/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,38 @@ models:
tests:
- column_type:
type: FLOAT64

- name: seed_tricky
columns:
- name: id
tests:
- column_type:
type: INT64
- name: id_str
tests:
- column_type:
type: STRING
- name: a_bool
tests:
- column_type:
type: BOOLEAN
- name: looks_like_a_bool
tests:
- column_type:
type: STRING
- name: a_date
tests:
- column_type:
type: DATETIME
- name: looks_like_a_date
tests:
- column_type:
type: STRING
- name: relative
tests:
- column_type:
type: STRING
- name: weekday
tests:
- column_type:
type: STRING
35 changes: 35 additions & 0 deletions test/integration/005_simple_seed_test/models-pg/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,38 @@ models:
tests:
- column_type:
type: text

- name: seed_tricky
columns:
- name: id
tests:
- column_type:
type: integer
- name: id_str
tests:
- column_type:
type: text
- name: a_bool
tests:
- column_type:
type: boolean
- name: looks_like_a_bool
tests:
- column_type:
type: text
- name: a_date
tests:
- column_type:
type: timestamp without time zone
- name: looks_like_a_date
tests:
- column_type:
type: text
- name: relative
tests:
- column_type:
type: text
- name: weekday
tests:
- column_type:
type: text
35 changes: 35 additions & 0 deletions test/integration/005_simple_seed_test/models-rs/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,38 @@ models:
tests:
- column_type:
type: character varying(256)

- name: seed_tricky
columns:
- name: id
tests:
- column_type:
type: integer
- name: id_str
tests:
- column_type:
type: character varying(256)
- name: a_bool
tests:
- column_type:
type: boolean
- name: looks_like_a_bool
tests:
- column_type:
type: character varying(256)
- name: a_date
tests:
- column_type:
type: timestamp without time zone
- name: looks_like_a_date
tests:
- column_type:
type: character varying(256)
- name: relative
tests:
- column_type:
type: character varying(9)
- name: weekday
tests:
- column_type:
type: character varying(8)
39 changes: 37 additions & 2 deletions test/integration/005_simple_seed_test/models-snowflake/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,46 @@ version: 2
models:
- name: seed_enabled
columns:
- name: BIRTHDAY
- name: birthday
tests:
- column_type:
type: character varying(16777216)
- name: ID
- name: id
tests:
- column_type:
type: FLOAT

- name: seed_tricky
columns:
- name: id
tests:
- column_type:
type: NUMBER(38,0)
- name: id_str
tests:
- column_type:
type: character varying(16777216)
- name: a_bool
tests:
- column_type:
type: BOOLEAN
- name: looks_like_a_bool
tests:
- column_type:
type: character varying(16777216)
- name: a_date
tests:
- column_type:
type: TIMESTAMP_NTZ
- name: looks_like_a_date
tests:
- column_type:
type: character varying(16777216)
- name: relative
tests:
- column_type:
type: character varying(16777216)
- name: weekday
tests:
- column_type:
type: character varying(16777216)
Loading

0 comments on commit 0b18212

Please sign in to comment.