From 3432436daec18142c29e9df3d86cfbc6bde144b1 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Thu, 16 Nov 2023 16:24:55 +0000 Subject: [PATCH] Fix #8652: Use seed file from disk for unit testing if rows not specified in YAML config (#9064) Co-authored-by: Michelle Ark Fix #8652: Use seed value if rows not specified --- .../unreleased/Fixes-20231113-154535.yaml | 6 + core/dbt/parser/unit_tests.py | 38 ++++- dev-requirements.txt | 2 +- .../unit_testing/test_unit_testing.py | 134 +++++++++++++++++- 4 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Fixes-20231113-154535.yaml diff --git a/.changes/unreleased/Fixes-20231113-154535.yaml b/.changes/unreleased/Fixes-20231113-154535.yaml new file mode 100644 index 00000000000..f352830921f --- /dev/null +++ b/.changes/unreleased/Fixes-20231113-154535.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Use seed file from disk for unit testing if rows not specified in YAML config +time: 2023-11-13T15:45:35.008565Z +custom: + Author: aranke + Issue: "8652" diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index d5dfde78247..c0182ef017c 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -1,5 +1,9 @@ +from csv import DictReader +from pathlib import Path from typing import List, Set, Dict, Any +from dbt_extractor import py_extract_from_source, ExtractionError # type: ignore + from dbt.config import RuntimeConfig from dbt.context.context_config import ContextConfig from dbt.context.providers import generate_parse_exposure, get_rendered @@ -28,7 +32,6 @@ ParseResult, ) from dbt.utils import get_pseudo_test_path -from dbt_extractor import py_extract_from_source, ExtractionError # type: ignore class UnitTestManifestLoader: @@ -130,7 +133,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): ), } - if original_input_node.resource_type == NodeType.Model: + if original_input_node.resource_type in (NodeType.Model, NodeType.Seed): input_name = f"{unit_test_node.name}__{original_input_node.name}" input_node = ModelNode( **common_fields, @@ -219,6 +222,35 @@ def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None: self.schema_parser = schema_parser self.yaml = yaml + def _load_rows_from_seed(self, ref_str: str) -> List[Dict[str, Any]]: + """Read rows from seed file on disk if not specified in YAML config. If seed file doesn't exist, return empty list.""" + ref = py_extract_from_source("{{ " + ref_str + " }}")["refs"][0] + + rows: List[Dict[str, Any]] = [] + + seed_name = ref["name"] + package_name = ref.get("package", self.project.project_name) + + seed_node = self.manifest.ref_lookup.find(seed_name, package_name, None, self.manifest) + + if not seed_node or seed_node.resource_type != NodeType.Seed: + # Seed not found in custom package specified + if package_name != self.project.project_name: + raise ParsingError( + f"Unable to find seed '{package_name}.{seed_name}' for unit tests in '{package_name}' package" + ) + else: + raise ParsingError( + f"Unable to find seed '{package_name}.{seed_name}' for unit tests in directories: {self.project.seed_paths}" + ) + + seed_path = Path(seed_node.root_path) / seed_node.original_file_path + with open(seed_path, "r") as f: + for row in DictReader(f): + rows.append(row) + + return rows + def parse(self) -> ParseResult: for data in self.get_key_dicts(): unit_test = self._get_unit_test(data) @@ -232,6 +264,8 @@ def parse(self) -> ParseResult: # Check that format and type of rows matches for each given input for input in unit_test.given: + if input.rows is None and input.fixture is None: + input.rows = self._load_rows_from_seed(input.input) input.validate_fixture("input", unit_test.name) unit_test.expect.validate_fixture("expected", unit_test.name) diff --git a/dev-requirements.txt b/dev-requirements.txt index 3f0aba49444..6270928d95d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ black==23.3.0 bumpversion -ddtrace +ddtrace==2.1.7 docutils flake8 flaky diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 03074c3b543..815881da12b 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -5,7 +5,7 @@ get_manifest, get_artifact, ) -from dbt.exceptions import DuplicateResourceNameError +from dbt.exceptions import DuplicateResourceNameError, ParsingError from fixtures import ( my_model_vars_sql, my_model_a_sql, @@ -105,3 +105,135 @@ def test_basic(self, project): # Select by model name results = run_dbt(["unit-test", "--select", "my_incremental_model"], expect_pass=True) assert len(results) == 2 + + +my_new_model = """ +select +my_favorite_seed.id, +a + b as c +from {{ ref('my_favorite_seed') }} as my_favorite_seed +inner join {{ ref('my_favorite_model') }} as my_favorite_model +on my_favorite_seed.id = my_favorite_model.id +""" + +my_favorite_model = """ +select +2 as id, +3 as b +""" + +seed_my_favorite_seed = """id,a +1,5 +2,4 +3,3 +4,2 +5,1 +""" + +schema_yml_explicit_seed = """ +unit_tests: + - name: t + model: my_new_model + given: + - input: ref('my_favorite_seed') + rows: + - {id: 1, a: 10} + - input: ref('my_favorite_model') + rows: + - {id: 1, b: 2} + expect: + rows: + - {id: 1, c: 12} +""" + +schema_yml_implicit_seed = """ +unit_tests: + - name: t + model: my_new_model + given: + - input: ref('my_favorite_seed') + - input: ref('my_favorite_model') + rows: + - {id: 1, b: 2} + expect: + rows: + - {id: 1, c: 7} +""" + +schema_yml_nonexistent_seed = """ +unit_tests: + - name: t + model: my_new_model + given: + - input: ref('my_second_favorite_seed') + - input: ref('my_favorite_model') + rows: + - {id: 1, b: 2} + expect: + rows: + - {id: 1, c: 7} +""" + + +class TestUnitTestExplicitSeed: + @pytest.fixture(scope="class") + def seeds(self): + return {"my_favorite_seed.csv": seed_my_favorite_seed} + + @pytest.fixture(scope="class") + def models(self): + return { + "my_new_model.sql": my_new_model, + "my_favorite_model.sql": my_favorite_model, + "schema.yml": schema_yml_explicit_seed, + } + + def test_explicit_seed(self, project): + run_dbt(["seed"]) + run_dbt(["run"]) + + # Select by model name + results = run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=True) + assert len(results) == 1 + + +class TestUnitTestImplicitSeed: + @pytest.fixture(scope="class") + def seeds(self): + return {"my_favorite_seed.csv": seed_my_favorite_seed} + + @pytest.fixture(scope="class") + def models(self): + return { + "my_new_model.sql": my_new_model, + "my_favorite_model.sql": my_favorite_model, + "schema.yml": schema_yml_implicit_seed, + } + + def test_implicit_seed(self, project): + run_dbt(["seed"]) + run_dbt(["run"]) + + # Select by model name + results = run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=True) + assert len(results) == 1 + + +class TestUnitTestNonexistentSeed: + @pytest.fixture(scope="class") + def seeds(self): + return {"my_favorite_seed.csv": seed_my_favorite_seed} + + @pytest.fixture(scope="class") + def models(self): + return { + "my_new_model.sql": my_new_model, + "my_favorite_model.sql": my_favorite_model, + "schema.yml": schema_yml_nonexistent_seed, + } + + def test_nonexistent_seed(self, project): + with pytest.raises( + ParsingError, match="Unable to find seed 'test.my_second_favorite_seed' for unit tests" + ): + run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=False)