Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add base class for merge exclude tests #6700

Merged
merged 4 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230123-132814.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: add merge_exclude_columns adapter tests
time: 2023-01-23T13:28:14.808748-06:00
custom:
Author: dave-connors-3
Issue: "6699"
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pytest
from dbt.tests.util import run_dbt, check_relations_equal
from collections import namedtuple


models__merge_exclude_columns_sql = """
{{ config(
materialized = 'incremental',
unique_key = 'id',
incremental_strategy='merge',
merge_exclude_columns=['msg']
) }}

{% if not is_incremental() %}
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved

select 1 as id, 'hello' as msg, 'blue' as color
union all
select 2 as id, 'goodbye' as msg, 'red' as color

{% else %}

select 1 as id, 'hey' as msg, 'blue' as color
dave-connors-3 marked this conversation as resolved.
Show resolved Hide resolved
union all
select 2 as id, 'yo' as msg, 'green' as color
union all
select 3 as id, 'anyway' as msg, 'purple' as color

{% endif %}
"""

seeds__expected_merge_exclude_columns_csv = """id,msg,color
1,hello,blue
2,goodbye,green
3,anyway,purple
"""

ResultHolder = namedtuple(
"ResultHolder",
[
"seed_count",
"model_count",
"seed_rows",
"inc_test_model_count",
"opt_model_count",
"relation",
],
)


class BaseMergeExcludeColumns:
@pytest.fixture(scope="class")
def models(self):
return {"merge_exclude_columns.sql": models__merge_exclude_columns_sql}

@pytest.fixture(scope="class")
def seeds(self):
return {"expected_merge_exclude_columns.csv": seeds__expected_merge_exclude_columns_csv}

def update_incremental_model(self, incremental_model):
"""update incremental model after the seed table has been updated"""
model_result_set = run_dbt(["run", "--select", incremental_model])
return len(model_result_set)

def get_test_fields(
self, project, seed, incremental_model, update_sql_file, opt_model_count=None
):

seed_count = len(run_dbt(["seed", "--select", seed, "--full-refresh"]))

model_count = len(run_dbt(["run", "--select", incremental_model, "--full-refresh"]))
# pass on kwarg
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the keyword argument here exactly? I just see renaming a variable name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

believe this code was copied from elsewhere, the comments may not be perfectly applicable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

specifically, took Matt's work from in test_incremental_unique_id

relation = incremental_model
# update seed in anticipation of incremental model update
row_count_query = "select * from {}.{}".format(project.test_schema, seed)
# project.run_sql_file(Path("seeds") / Path(update_sql_file + ".sql"))
dave-connors-3 marked this conversation as resolved.
Show resolved Hide resolved
seed_rows = len(project.run_sql(row_count_query, fetch="all"))

# propagate seed state to incremental model according to unique keys
inc_test_model_count = self.update_incremental_model(incremental_model=incremental_model)

return ResultHolder(
seed_count, model_count, seed_rows, inc_test_model_count, opt_model_count, relation
)

def check_scenario_correctness(self, expected_fields, test_case_fields, project):
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
"""Invoke assertions to verify correct build functionality"""
# 1. test seed(s) should build afresh
assert expected_fields.seed_count == test_case_fields.seed_count
# 2. test model(s) should build afresh
assert expected_fields.model_count == test_case_fields.model_count
# 3. seeds should have intended row counts post update
assert expected_fields.seed_rows == test_case_fields.seed_rows
# 4. incremental test model(s) should be updated
assert expected_fields.inc_test_model_count == test_case_fields.inc_test_model_count
# 5. extra incremental model(s) should be built; optional since
# comparison may be between an incremental model and seed
if expected_fields.opt_model_count and test_case_fields.opt_model_count:
assert expected_fields.opt_model_count == test_case_fields.opt_model_count
# 6. result table should match intended result set (itself a relation)
check_relations_equal(
project.adapter, [expected_fields.relation, test_case_fields.relation]
)

def get_expected_fields(self, relation, seed_rows, opt_model_count=None):
Copy link
Contributor

@VersusFacit VersusFacit Feb 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd probably make this a class object or a constant and not use a method to obscure it. All that this really does it hide the instantiation, but we don't expect these values to change. I don't see an advantage to this factory-function.

return ResultHolder(
seed_count=1,
model_count=1,
inc_test_model_count=1,
seed_rows=seed_rows,
opt_model_count=opt_model_count,
relation=relation,
)

# no unique_key test
def test__merge_exclude_columns(self, project):
"""seed should match model after two incremental runs"""

expected_fields = self.get_expected_fields(
relation="expected_merge_exclude_columns", seed_rows=3
)
test_case_fields = self.get_test_fields(
project,
seed="expected_merge_exclude_columns",
incremental_model="merge_exclude_columns",
update_sql_file=None,
)
self.check_scenario_correctness(expected_fields, test_case_fields, project)