Skip to content

Commit

Permalink
Support configurable delimiter for seed files, default to comma (dbt-…
Browse files Browse the repository at this point in the history
  • Loading branch information
ramonvermeulen committed Mar 17, 2023
1 parent 4186f99 commit 596856b
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 5 deletions.
4 changes: 2 additions & 2 deletions core/dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ def as_matrix(table):
return [r.values() for r in table.rows.values()]


def from_csv(abspath, text_columns):
def from_csv(abspath, text_columns, delimiter = ","):
type_tester = build_type_tester(text_columns=text_columns)
with open(abspath, encoding="utf-8") as fp:
if fp.read(1) != BOM:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=type_tester)
return agate.Table.from_csv(fp, column_types=type_tester, delimiter=delimiter)


class _NullMarker:
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,9 @@ def load_agate_table(self) -> agate.Table:
assert self.model.root_path
path = os.path.join(self.model.root_path, self.model.original_file_path)
column_types = self.model.config.column_types
delimiter = self.model.config.delimiter
try:
table = agate_helper.from_csv(path, text_columns=column_types)
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model)
table.original_abspath = os.path.abspath(path)
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ class NodeConfig(NodeAndTestConfig):
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)
delimiter: str = ","
full_refresh: Optional[bool] = None
# 'unique_key' doesn't use 'Optional' because typing.get_type_hints was
# sometimes getting the Union order wrong, causing serialization failures.
Expand Down
2 changes: 2 additions & 0 deletions test/unit/test_contracts_graph_compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def basic_uncompiled_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'view',
'persist_docs': {},
Expand Down Expand Up @@ -182,6 +183,7 @@ def basic_compiled_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'view',
'persist_docs': {},
Expand Down
26 changes: 24 additions & 2 deletions test/unit/test_contracts_graph_parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def populated_node_config_object():
def populated_node_config_dict():
return {
'column_types': {'a': 'text'},
'delimiter': ',',
'enabled': True,
'materialized': 'table',
'persist_docs': {},
Expand All @@ -91,6 +92,7 @@ def test_config_populated(populated_node_config_object, populated_node_config_di
def unrendered_node_config_dict():
return {
'column_types': {'a': 'text'},
'delimiter': ',',
'materialized': 'table',
'post_hook': 'insert into blah(a, b) select "1", 1',
}
Expand Down Expand Up @@ -150,6 +152,7 @@ def base_parsed_model_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'view',
'persist_docs': {},
Expand Down Expand Up @@ -251,6 +254,7 @@ def complex_parsed_model_dict():
'meta': {},
'config': {
'column_types': {'a': 'text'},
'delimiter': ',',
'enabled': True,
'materialized': 'ephemeral',
'persist_docs': {},
Expand Down Expand Up @@ -278,6 +282,7 @@ def complex_parsed_model_dict():
'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'},
'unrendered_config': {
'column_types': {'a': 'text'},
'delimiter': ',',
'materialized': 'ephemeral',
'post_hook': ['insert into blah(a, b) select "1", 1'],
},
Expand Down Expand Up @@ -311,22 +316,24 @@ def complex_parsed_model_object():
meta={},
config=NodeConfig(
column_types={'a': 'text'},
delimiter=',',
materialized='ephemeral',
post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')],
),
columns={'a': ColumnInfo('a', 'a text field', {})},
checksum=FileHash.from_contents(''),
unrendered_config={
'column_types': {'a': 'text'},
'delimiter': ',',
'materialized': 'ephemeral',
'post_hook': ['insert into blah(a, b) select "1", 1'],
},
)


{'enabled': True, 'tags': [], 'meta': {}, 'materialized': 'ephemeral', 'persist_docs': {}, 'quoting': {}, 'column_types': {'a': 'text'}, 'on_schema_change': 'ignore', 'grants': {}, 'packages': [], 'docs': {'show': True}, 'contract': False, 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], 'pre-hook': []}
{'enabled': True, 'tags': [], 'meta': {}, 'materialized': 'ephemeral', 'persist_docs': {}, 'quoting': {}, 'column_types': {'a': 'text'}, 'delimiter': ',', 'on_schema_change': 'ignore', 'grants': {}, 'packages': [], 'docs': {'show': True}, 'contract': False, 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], 'pre-hook': []}

{'column_types': {'a': 'text'}, 'enabled': True, 'materialized': 'ephemeral', 'persist_docs': {}, 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], 'pre-hook': [], 'quoting': {}, 'tags': [], 'on_schema_change': 'ignore', 'meta': {}, 'grants': {}, 'docs': {'show': True}, 'packages': []}
{'column_types': {'a': 'text'}, 'delimiter': ',', 'enabled': True, 'materialized': 'ephemeral', 'persist_docs': {}, 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], 'pre-hook': [], 'quoting': {}, 'tags': [], 'on_schema_change': 'ignore', 'meta': {}, 'grants': {}, 'docs': {'show': True}, 'packages': []}

def test_model_basic(basic_parsed_model_object, base_parsed_model_dict, minimal_parsed_model_dict):
node = basic_parsed_model_object
Expand Down Expand Up @@ -452,6 +459,7 @@ def basic_parsed_seed_dict():
'alias': 'foo',
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'seed',
'persist_docs': {},
Expand Down Expand Up @@ -542,6 +550,7 @@ def complex_parsed_seed_dict():
'alias': 'foo',
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'seed',
'persist_docs': {'relation': True, 'columns': True},
Expand Down Expand Up @@ -803,6 +812,7 @@ def base_parsed_hook_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'view',
'persist_docs': {},
Expand Down Expand Up @@ -882,6 +892,7 @@ def complex_parsed_hook_dict():
'meta': {},
'config': {
'column_types': {'a': 'text'},
'delimiter': ',',
'enabled': True,
'materialized': 'table',
'persist_docs': {},
Expand Down Expand Up @@ -910,6 +921,7 @@ def complex_parsed_hook_dict():
'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'},
'unrendered_config': {
'column_types': {'a': 'text'},
'delimiter': ',',
'materialized': 'table',
},
'config_call_dict': {},
Expand Down Expand Up @@ -941,6 +953,7 @@ def complex_parsed_hook_object():
meta={},
config=NodeConfig(
column_types={'a': 'text'},
delimiter=',',
materialized='table',
post_hook=[]
),
Expand All @@ -949,6 +962,7 @@ def complex_parsed_hook_object():
checksum=FileHash.from_contents(''),
unrendered_config={
'column_types': {'a': 'text'},
'delimiter': ',',
'materialized': 'table',
},
)
Expand Down Expand Up @@ -1226,6 +1240,7 @@ def test_invalid_severity(complex_parsed_schema_test_dict):
def basic_timestamp_snapshot_config_dict():
return {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand Down Expand Up @@ -1262,6 +1277,7 @@ def basic_timestamp_snapshot_config_object():
def complex_timestamp_snapshot_config_dict():
return {
'column_types': {'a': 'text'},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand All @@ -1288,6 +1304,7 @@ def complex_timestamp_snapshot_config_dict():
def complex_timestamp_snapshot_config_object():
cfg = SnapshotConfig(
column_types={'a': 'text'},
delimiter=',',
materialized='snapshot',
post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')],
strategy='timestamp',
Expand Down Expand Up @@ -1324,6 +1341,7 @@ def test_invalid_missing_updated_at(basic_timestamp_snapshot_config_dict):
def basic_check_snapshot_config_dict():
return {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand Down Expand Up @@ -1360,6 +1378,7 @@ def basic_check_snapshot_config_object():
def complex_set_snapshot_config_dict():
return {
'column_types': {'a': 'text'},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand All @@ -1386,6 +1405,7 @@ def complex_set_snapshot_config_dict():
def complex_set_snapshot_config_object():
cfg = SnapshotConfig(
column_types={'a': 'text'},
delimiter=',',
materialized='snapshot',
post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')],
strategy='check',
Expand Down Expand Up @@ -1472,6 +1492,7 @@ def basic_timestamp_snapshot_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand Down Expand Up @@ -1614,6 +1635,7 @@ def basic_check_snapshot_dict():
'tags': [],
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'snapshot',
'persist_docs': {},
Expand Down

0 comments on commit 596856b

Please sign in to comment.