Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CT-1959]: moving simple_seed tests to adapter zone #6859

Merged
merged 9 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20230203-143551.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Moving simple_seed to adapter zone to help adapter test conversions
time: 2023-02-03T14:35:51.481856-08:00
custom:
Author: nssalian
Issue: CT-1959
18 changes: 18 additions & 0 deletions core/dbt/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
# rm_file
# write_file
# read_file
# mkdir
# rm_dir
# get_artifact
# update_config_file
# write_config_file
Expand Down Expand Up @@ -156,6 +158,22 @@ def read_file(*paths):
return contents


# To create a directory
def mkdir(directory_path):
try:
os.makedirs(directory_path)
except FileExistsError:
raise FileExistsError(f"{directory_path} already exists.")


# To remove a directory
def rm_dir(directory_path):
try:
shutil.rmtree(directory_path)
except FileNotFoundError:
raise FileNotFoundError(f"{directory_path} does not exist.")


# Get an artifact (usually from the target directory) such as
# manifest.json or catalog.json to use in a test
def get_artifact(*paths):
Expand Down
95 changes: 95 additions & 0 deletions tests/adapter/dbt/tests/adapter/simple_seed/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#
# Macros
#

macros__schema_test = """
{% test column_type(model, column_name, type) %}

{% set cols = adapter.get_columns_in_relation(model) %}

{% set col_types = {} %}
{% for col in cols %}
{% do col_types.update({col.name: col.data_type}) %}
{% endfor %}

{% set validation_message = 'Got a column type of ' ~ col_types.get(column_name) ~ ', expected ' ~ type %}

{% set val = 0 if col_types.get(column_name) == type else 1 %}
{% if val == 1 and execute %}
{{ log(validation_message, info=True) }}
{% endif %}

select '{{ validation_message }}' as validation_error
from (select true) as nothing
where {{ val }} = 1

{% endtest %}

"""

#
# Models
#

models__downstream_from_seed_actual = """
select * from {{ ref('seed_actual') }}

"""
models__from_basic_seed = """
select * from {{ this.schema }}.seed_expected

"""

#
# Properties
#

properties__schema_yml = """
version: 2
seeds:
- name: seed_enabled
columns:
- name: birthday
tests:
- column_type:
type: date
- name: seed_id
tests:
- column_type:
type: text

- name: seed_tricky
columns:
- name: seed_id
tests:
- column_type:
type: integer
- name: seed_id_str
tests:
- column_type:
type: text
- name: a_bool
tests:
- column_type:
type: boolean
- name: looks_like_a_bool
tests:
- column_type:
type: text
- name: a_date
tests:
- column_type:
type: timestamp without time zone
- name: looks_like_a_date
tests:
- column_type:
type: text
- name: relative
tests:
- column_type:
type: text
- name: weekday
tests:
- column_type:
type: text
"""

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,29 +1,35 @@
import csv
import pytest
import shutil

from codecs import BOM_UTF8
from pathlib import Path

from dbt.tests.util import (
rm_file,
copy_file,
mkdir,
rm_dir,
run_dbt,
read_file,
check_relations_equal,
check_table_does_exist,
check_table_does_not_exist,
)

from tests.functional.simple_seed.fixtures import (
from dbt.tests.adapter.simple_seed.fixtures import (
models__downstream_from_seed_actual,
models__from_basic_seed,
seeds__disabled_in_config,
seeds__enabled_in_config,
seeds__tricky,
seeds__wont_parse,
)

# from `test/integration/test_simple_seed`, test_simple_seed
from dbt.tests.adapter.simple_seed.seeds import (
seed__actual_csv,
seeds__expected_sql,
seeds__enabled_in_config_csv,
seeds__disabled_in_config_csv,
seeds__tricky_csv,
seeds__wont_parse_csv,
seed__unicode_csv,
seed__with_dots_csv,
)


class SeedConfigBase(object):
Expand All @@ -40,12 +46,11 @@ class SeedTestBase(SeedConfigBase):
@pytest.fixture(scope="class", autouse=True)
def setUp(self, project):
"""Create table for ensuring seeds and models used in tests build correctly"""
project.run_sql_file(project.test_data_dir / Path("seed_expected.sql"))
project.run_sql(seeds__expected_sql)

@pytest.fixture(scope="class")
def seeds(self, test_data_dir):
seed_actual_csv = read_file(test_data_dir, "seed_actual.csv")
return {"seed_actual.csv": seed_actual_csv}
return {"seed_actual.csv": seed__actual_csv}

@pytest.fixture(scope="class")
def models(self):
Expand Down Expand Up @@ -110,7 +115,7 @@ def project_config_update(self):
}

def test_simple_seed_full_refresh_config(self, project):
"""Config options should override full-refresh flag because config is higher priority"""
"""Config options should override a full-refresh flag because config is higher priority"""
self._build_relations_for_test(project)
self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True)
self._check_relation_end_state(
Expand All @@ -122,7 +127,7 @@ class TestSeedCustomSchema(SeedTestBase):
@pytest.fixture(scope="class", autouse=True)
def setUp(self, project):
"""Create table for ensuring seeds and models used in tests build correctly"""
project.run_sql_file(project.test_data_dir / Path("seed_expected.sql"))
project.run_sql(seeds__expected_sql)

@pytest.fixture(scope="class")
def project_config_update(self):
Expand All @@ -134,8 +139,8 @@ def project_config_update(self):
}

def test_simple_seed_with_schema(self, project):
results = run_dbt(["seed"])
assert len(results) == 1
seed_results = run_dbt(["seed"])
assert len(seed_results) == 1
custom_schema = f"{project.test_schema}_custom_schema"
check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"])

Expand All @@ -146,13 +151,14 @@ def test_simple_seed_with_schema(self, project):
check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"])

def test_simple_seed_with_drop_and_schema(self, project):
results = run_dbt(["seed"])
assert len(results) == 1
seed_results = run_dbt(["seed"])
assert len(seed_results) == 1
custom_schema = f"{project.test_schema}_custom_schema"
check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"])

# this should drop the seed table, then re-create
results = run_dbt(["seed", "--full-refresh"])
assert len(results) == 1
custom_schema = f"{project.test_schema}_custom_schema"
check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"])

Expand All @@ -161,9 +167,9 @@ class TestSimpleSeedEnabledViaConfig(object):
@pytest.fixture(scope="session")
def seeds(self):
return {
"seed_enabled.csv": seeds__enabled_in_config,
"seed_disabled.csv": seeds__disabled_in_config,
"seed_tricky.csv": seeds__tricky,
"seed_enabled.csv": seeds__enabled_in_config_csv,
"seed_disabled.csv": seeds__disabled_in_config_csv,
"seed_tricky.csv": seeds__tricky_csv,
}

@pytest.fixture(scope="class")
Expand All @@ -182,21 +188,21 @@ def clear_test_schema(self, project):

def test_simple_seed_with_disabled(self, clear_test_schema, project):
results = run_dbt(["seed"])
len(results) == 2
assert len(results) == 2
check_table_does_exist(project.adapter, "seed_enabled")
check_table_does_not_exist(project.adapter, "seed_disabled")
check_table_does_exist(project.adapter, "seed_tricky")

def test_simple_seed_selection(self, clear_test_schema, project):
results = run_dbt(["seed", "--select", "seed_enabled"])
len(results) == 1
assert len(results) == 1
check_table_does_exist(project.adapter, "seed_enabled")
check_table_does_not_exist(project.adapter, "seed_disabled")
check_table_does_not_exist(project.adapter, "seed_tricky")

def test_simple_seed_exclude(self, clear_test_schema, project):
results = run_dbt(["seed", "--exclude", "seed_enabled"])
len(results) == 1
assert len(results) == 1
check_table_does_not_exist(project.adapter, "seed_enabled")
check_table_does_not_exist(project.adapter, "seed_disabled")
check_table_does_exist(project.adapter, "seed_tricky")
Expand All @@ -206,19 +212,19 @@ class TestSeedParsing(SeedConfigBase):
@pytest.fixture(scope="class", autouse=True)
def setUp(self, project):
"""Create table for ensuring seeds and models used in tests build correctly"""
project.run_sql_file(project.test_data_dir / Path("seed_expected.sql"))
project.run_sql(seeds__expected_sql)

@pytest.fixture(scope="class")
def seeds(self):
return {"seed.csv": seeds__wont_parse}
return {"seed.csv": seeds__wont_parse_csv}

@pytest.fixture(scope="class")
def models(self):
return {"model.sql": models__from_basic_seed}

def test_dbt_run_skips_seeds(self, project):
# run does not try to parse the seed files
len(run_dbt()) == 1
assert len(run_dbt()) == 1

# make sure 'dbt seed' fails, otherwise our test is invalid!
run_dbt(["seed"], expect_pass=False)
Expand All @@ -229,32 +235,32 @@ class TestSimpleSeedWithBOM(SeedConfigBase):
@pytest.fixture(scope="class", autouse=True)
def setUp(self, project):
"""Create table for ensuring seeds and models used in tests build correctly"""
project.run_sql_file(project.test_data_dir / Path("seed_expected.sql"))

# manual copy because seed has a special and tricky-to-include unicode character at 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to preserve these as actual .csv files to validate this? I'm wondering if it would be easier to have a core-only test that looks at some of these edge cases (BOM, unicode etc.) and an actually "simple" seed test in the adapter zone that just validates loading csv files

Copy link
Contributor Author

@nssalian nssalian Feb 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would be a better approach. I'll modify the code.
Update: rethinking this - the adapter zone could include a csv that checks for these cases rather than having the tests split across the two paths. No duplication.

shutil.copyfile(
project.test_data_dir / Path("seed_bom.csv"),
project.project_root / Path("seeds") / Path("seed_bom.csv"),
project.run_sql(seeds__expected_sql)
copy_file(
project.test_dir,
"seed_bom.csv",
project.project_root / Path("seeds") / "seed_bom.csv",
"",
)

def test_simple_seed(self, project):
results = run_dbt(["seed"])
assert len(results) == 1

seed_result = run_dbt(["seed"])
assert len(seed_result) == 1
# encoding param must be specified in open, so long as Python reads files with a
# default file encoding for character sets beyond extended ASCII.
with open(
project.project_root / Path("seeds") / Path("seed_bom.csv"), encoding="utf-8"
) as fp:
assert fp.read(1) == BOM_UTF8.decode("utf-8")

check_relations_equal(project.adapter, ["seed_expected", "seed_bom"])


class TestSeedSpecificFormats(SeedConfigBase):
"""Expect all edge cases to build"""

def _make_big_seed(self, test_data_dir):
@staticmethod
def _make_big_seed(test_data_dir):
mkdir(test_data_dir)
big_seed_path = test_data_dir / Path("tmp.csv")
with open(big_seed_path, "w") as f:
writer = csv.writer(f)
Expand All @@ -265,18 +271,16 @@ def _make_big_seed(self, test_data_dir):

@pytest.fixture(scope="class")
def seeds(self, test_data_dir):
seed_unicode = read_file(test_data_dir, "seed_unicode.csv")
dotted_seed = read_file(test_data_dir, "seed.with.dots.csv")
big_seed_path = self._make_big_seed(test_data_dir)
big_seed = read_file(big_seed_path)

yield {
"big_seed.csv": big_seed,
"seed.with.dots.csv": dotted_seed,
"seed_unicode.csv": seed_unicode,
"seed.with.dots.csv": seed__with_dots_csv,
"seed_unicode.csv": seed__unicode_csv,
}
rm_file(big_seed_path)
rm_dir(test_data_dir)

def test_simple_seed(self, project):
results = run_dbt(["seed"])
len(results) == 3
assert len(results) == 3
Loading