Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unit test support for state:modified and --defer #9032

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231107-231006.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Unit tests support --defer and state:modified
time: 2023-11-07T23:10:06.376588-05:00
custom:
Author: jtcohen6
Issue: "8517"
18 changes: 18 additions & 0 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,7 @@
overrides: Optional[UnitTestOverrides] = None
depends_on: DependsOn = field(default_factory=DependsOn)
config: UnitTestConfig = field(default_factory=UnitTestConfig)
checksum: Optional[str] = None

@property
def depends_on_nodes(self):
Expand All @@ -1090,6 +1091,23 @@
tags = self.config.tags
return [tags] if isinstance(tags, str) else tags

def build_unit_test_checksum(self, project_root: str, fixture_paths: List[str]):
# everything except 'description'
data = f"{self.model}-{self.given}-{self.expect}-{self.overrides}"

# include underlying fixture data
for input in self.given:
if input.fixture:
data += f"-{input.get_rows(project_root, fixture_paths)}"

self.checksum = hashlib.new("sha256", data.encode("utf-8")).hexdigest()

def same_contents(self, other: Optional["UnitTestDefinition"]) -> bool:
if other is None:
return False

Check warning on line 1107 in core/dbt/contracts/graph/nodes.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/graph/nodes.py#L1107

Added line #L1107 was not covered by tests

return self.checksum == other.checksum


# ====================================
# Snapshot node
Expand Down
12 changes: 10 additions & 2 deletions core/dbt/graph/selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def is_selected_node(fqn: List[str], node_selector: str, is_versioned: bool) ->
return True


SelectorTarget = Union[SourceDefinition, ManifestNode, Exposure, Metric]
SelectorTarget = Union[
SourceDefinition, ManifestNode, Exposure, Metric, SemanticModel, UnitTestDefinition
]


class SelectorMethod(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -639,7 +641,9 @@ def check_macros_modified(self, node):
def check_modified_content(
self, old: Optional[SelectorTarget], new: SelectorTarget, adapter_type: str
) -> bool:
if isinstance(new, (SourceDefinition, Exposure, Metric, SemanticModel)):
if isinstance(
new, (SourceDefinition, Exposure, Metric, SemanticModel, UnitTestDefinition)
):
# these all overwrite `same_contents`
different_contents = not new.same_contents(old) # type: ignore
else:
Expand Down Expand Up @@ -730,6 +734,10 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
elif node in manifest.semantic_models:
previous_node = manifest.semantic_models[node]
elif node in manifest.unit_tests:
previous_node = manifest.unit_tests[node]

keyword_args = {}
if checker.__name__ in [
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/parser/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def parse(self) -> ParseResult:
fqn=unit_test_fqn,
config=unit_test_config,
)
# for calculating state:modified
unit_test_definition.build_unit_test_checksum(
self.schema_parser.project.project_root, self.schema_parser.project.fixture_paths
)
self.manifest.add_unit_test(self.yaml.file, unit_test_definition)

return ParseResult()
Expand Down
15 changes: 14 additions & 1 deletion core/dbt/task/unit_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from dataclasses import dataclass
from dbt.dataclass_schema import dbtClassMixin
import threading
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, AbstractSet
import io

from .compile import CompileRunner
from .run import RunTask

from dbt.adapters.factory import get_adapter
from dbt.contracts.graph.nodes import UnitTestNode
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.results import TestStatus, RunResult
Expand Down Expand Up @@ -206,13 +207,25 @@ def build_unit_test_manifest(self):
return loader.load()

def reset_job_queue_and_manifest(self):
# We want deferral to happen here (earlier than normal) before we turn
# the normal manifest into the unit testing manifest
adapter = get_adapter(self.config)
with adapter.connection_named("master"):
self.populate_adapter_cache(adapter)
self.defer_to_manifest(adapter, self.job_queue._selected)

# We have the selected models from the "regular" manifest, now we switch
# to using the unit_test_manifest to run the unit tests.
self.using_unit_test_manifest = True
self.manifest = self.build_unit_test_manifest()
self.compile_manifest() # create the networkx graph
self.job_queue = self.get_graph_queue()

def before_run(self, adapter, selected_uids: AbstractSet[str]) -> None:
# We already did cache population + deferral earlier (in reset_job_queue_and_manifest)
# and we don't need to create any schemas
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After moving the calling of unit tests into the test task, this is going to be harder to do differently like this since running unit tests will hook into the standard runnable flow.

Copy link
Contributor

@MichelleArk MichelleArk Nov 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, exactly... 🤔 happy to brainstorm alternative approaches in the test task work

pass

def get_node_selector(self) -> ResourceTypeSelector:
if self.manifest is None or self.graph is None:
raise DbtInternalError("manifest and graph must be set to get perform node selection")
Expand Down
86 changes: 74 additions & 12 deletions tests/functional/unit_testing/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,73 @@
tags: test_this
"""


test_my_model_simple_fixture_yml = """
unit_tests:
- name: test_my_model
model: my_model
given:
- input: ref('my_model_a')
rows:
- {id: 1, a: 1}
- input: ref('my_model_b')
rows:
- {id: 1, b: 2}
- {id: 2, b: 2}
expect:
rows:
- {c: 2}

- name: test_depends_on_fixture
model: my_model
given:
- input: ref('my_model_a')
rows: []
- input: ref('my_model_b')
format: csv
fixture: test_my_model_fixture
expect:
rows: []

- name: test_my_model_overrides
model: my_model
given:
- input: ref('my_model_a')
rows:
- {id: 1, a: 1}
- input: ref('my_model_b')
rows:
- {id: 1, b: 2}
- {id: 2, b: 2}
overrides:
macros:
type_numeric: override
invocation_id: 123
vars:
my_test: var_override
env_vars:
MY_TEST: env_var_override
expect:
rows:
- {macro_call: override, var_call: var_override, env_var_call: env_var_override, invocation_id: 123}

- name: test_has_string_c_ab
model: my_model
given:
- input: ref('my_model_a')
rows:
- {id: 1, string_a: a}
- input: ref('my_model_b')
rows:
- {id: 1, string_b: b}
expect:
rows:
- {string_c: ab}
config:
tags: test_this
"""


datetime_test = """
- name: test_my_model_datetime
model: my_model
Expand Down Expand Up @@ -381,36 +448,31 @@
tags: test_this
"""

test_my_model_fixture_csv = """
id,b
test_my_model_fixture_csv = """id,b
1,2
2,2
"""

test_my_model_a_fixture_csv = """
id,string_a
test_my_model_a_fixture_csv = """id,string_a
1,a
"""

test_my_model_a_empty_fixture_csv = """
"""

test_my_model_a_numeric_fixture_csv = """
id,a
test_my_model_a_numeric_fixture_csv = """id,a
1,1
"""

test_my_model_b_fixture_csv = """
id,string_b
test_my_model_b_fixture_csv = """id,string_b
1,b
"""
test_my_model_basic_fixture_csv = """
c

test_my_model_basic_fixture_csv = """c
2
"""

test_my_model_concat_fixture_csv = """
string_c
test_my_model_concat_fixture_csv = """string_c
ab
"""

Expand Down
135 changes: 135 additions & 0 deletions tests/functional/unit_testing/test_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import os
import pytest
import shutil
from copy import deepcopy

from dbt.tests.util import (
run_dbt,
write_file,
write_config_file,
)
from fixtures import (
my_model_vars_sql,
my_model_a_sql,
my_model_b_sql,
test_my_model_simple_fixture_yml,
test_my_model_fixture_csv,
test_my_model_b_fixture_csv as test_my_model_fixture_csv_modified,
)


class UnitTestState:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_model_vars_sql,
"my_model_a.sql": my_model_a_sql,
"my_model_b.sql": my_model_b_sql,
"test_my_model.yml": test_my_model_simple_fixture_yml,
}

@pytest.fixture(scope="class")
def tests(self):
return {
"fixtures": {
"test_my_model_fixture.csv": test_my_model_fixture_csv,
}
}

@pytest.fixture(scope="class")
def project_config_update(self):
return {"vars": {"my_test": "my_test_var"}}

def copy_state(self, project_root):
state_path = os.path.join(project_root, "state")
if not os.path.exists(state_path):
os.makedirs(state_path)
shutil.copyfile(
f"{project_root}/target/manifest.json", f"{project_root}/state/manifest.json"
)
shutil.copyfile(
f"{project_root}/target/run_results.json", f"{project_root}/state/run_results.json"
)


class TestUnitTestStateModified(UnitTestState):
def test_state_modified(self, project):
run_dbt(["run"])
run_dbt(["unit-test"], expect_pass=False)
self.copy_state(project.project_root)

# no changes
results = run_dbt(["unit-test", "--select", "state:modified", "--state", "state"])
assert len(results) == 0

# change underlying fixture file
write_file(
test_my_model_fixture_csv_modified,
project.project_root,
"tests",
"fixtures",
"test_my_model_fixture.csv",
)
# TODO: remove --no-partial-parse as part of https://github.com/dbt-labs/dbt-core/issues/9067
results = run_dbt(
["--no-partial-parse", "unit-test", "--select", "state:modified", "--state", "state"],
expect_pass=True,
)
assert len(results) == 1
assert results[0].node.name.endswith("test_depends_on_fixture")
# reset changes
self.copy_state(project.project_root)

# change unit test definition of a single unit test
with_changes = test_my_model_simple_fixture_yml.replace("{string_c: ab}", "{string_c: bc}")
write_config_file(with_changes, project.project_root, "models", "test_my_model.yml")
results = run_dbt(
["unit-test", "--select", "state:modified", "--state", "state"], expect_pass=False
)
assert len(results) == 1
assert results[0].node.name.endswith("test_has_string_c_ab")

# change underlying model logic
write_config_file(
test_my_model_simple_fixture_yml, project.project_root, "models", "test_my_model.yml"
)
write_file(
my_model_vars_sql.replace("a+b as c,", "a + b as c,"),
project.project_root,
"models",
"my_model.sql",
)
results = run_dbt(
["unit-test", "--select", "state:modified", "--state", "state"], expect_pass=False
)
assert len(results) == 4


class TestUnitTestRetry(UnitTestState):
def test_unit_test_retry(self, project):
run_dbt(["run"])
run_dbt(["unit-test"], expect_pass=False)
self.copy_state(project.project_root)

results = run_dbt(["retry"], expect_pass=False)
assert len(results) == 1


class TestUnitTestDeferState(UnitTestState):
@pytest.fixture(scope="class")
def other_schema(self, unique_schema):
return unique_schema + "_other"

@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target, unique_schema, other_schema):
outputs = {"default": dbt_profile_target, "otherschema": deepcopy(dbt_profile_target)}
outputs["default"]["schema"] = unique_schema
outputs["otherschema"]["schema"] = other_schema
return {"test": {"outputs": outputs, "target": "default"}}

def test_unit_test_defer_state(self, project):
run_dbt(["run", "--target", "otherschema"])
self.copy_state(project.project_root)
results = run_dbt(["unit-test", "--defer", "--state", "state"], expect_pass=False)
assert len(results) == 4
assert sorted([r.status for r in results]) == ["fail", "pass", "pass", "pass"]
7 changes: 6 additions & 1 deletion tests/functional/unit_testing/test_unit_testing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import pytest
from dbt.tests.util import run_dbt, write_file, get_manifest, get_artifact
from dbt.tests.util import (
run_dbt,
write_file,
get_manifest,
get_artifact,
)
from dbt.exceptions import DuplicateResourceNameError
from fixtures import (
my_model_vars_sql,
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_unit_test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def test_basic(self):
fqn=["snowplow", "my_model", "test_my_model"],
config=UnitTestConfig(),
)
expected.build_unit_test_checksum("anything", "anything")
assertEqualNodes(unit_test, expected)

def test_unit_test_config(self):
Expand Down