From 3432436daec18142c29e9df3d86cfbc6bde144b1 Mon Sep 17 00:00:00 2001
From: Kshitij Aranke <kshitij.aranke@dbtlabs.com>
Date: Thu, 16 Nov 2023 16:24:55 +0000
Subject: [PATCH] Fix #8652: Use seed file from disk for unit testing if rows
 not specified in YAML config (#9064)

Co-authored-by: Michelle Ark <MichelleArk@users.noreply.github.com>
Fix #8652: Use seed value if rows not specified
---
 .../unreleased/Fixes-20231113-154535.yaml     |   6 +
 core/dbt/parser/unit_tests.py                 |  38 ++++-
 dev-requirements.txt                          |   2 +-
 .../unit_testing/test_unit_testing.py         | 134 +++++++++++++++++-
 4 files changed, 176 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20231113-154535.yaml

diff --git a/.changes/unreleased/Fixes-20231113-154535.yaml b/.changes/unreleased/Fixes-20231113-154535.yaml
new file mode 100644
index 00000000000..f352830921f
--- /dev/null
+++ b/.changes/unreleased/Fixes-20231113-154535.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Use seed file from disk for unit testing if rows not specified in YAML config
+time: 2023-11-13T15:45:35.008565Z
+custom:
+  Author: aranke
+  Issue: "8652"
diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py
index d5dfde78247..c0182ef017c 100644
--- a/core/dbt/parser/unit_tests.py
+++ b/core/dbt/parser/unit_tests.py
@@ -1,5 +1,9 @@
+from csv import DictReader
+from pathlib import Path
 from typing import List, Set, Dict, Any
 
+from dbt_extractor import py_extract_from_source, ExtractionError  # type: ignore
+
 from dbt.config import RuntimeConfig
 from dbt.context.context_config import ContextConfig
 from dbt.context.providers import generate_parse_exposure, get_rendered
@@ -28,7 +32,6 @@
     ParseResult,
 )
 from dbt.utils import get_pseudo_test_path
-from dbt_extractor import py_extract_from_source, ExtractionError  # type: ignore
 
 
 class UnitTestManifestLoader:
@@ -130,7 +133,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
                 ),
             }
 
-            if original_input_node.resource_type == NodeType.Model:
+            if original_input_node.resource_type in (NodeType.Model, NodeType.Seed):
                 input_name = f"{unit_test_node.name}__{original_input_node.name}"
                 input_node = ModelNode(
                     **common_fields,
@@ -219,6 +222,35 @@ def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None:
         self.schema_parser = schema_parser
         self.yaml = yaml
 
+    def _load_rows_from_seed(self, ref_str: str) -> List[Dict[str, Any]]:
+        """Read rows from seed file on disk if not specified in YAML config. If seed file doesn't exist, return empty list."""
+        ref = py_extract_from_source("{{ " + ref_str + " }}")["refs"][0]
+
+        rows: List[Dict[str, Any]] = []
+
+        seed_name = ref["name"]
+        package_name = ref.get("package", self.project.project_name)
+
+        seed_node = self.manifest.ref_lookup.find(seed_name, package_name, None, self.manifest)
+
+        if not seed_node or seed_node.resource_type != NodeType.Seed:
+            # Seed not found in custom package specified
+            if package_name != self.project.project_name:
+                raise ParsingError(
+                    f"Unable to find seed '{package_name}.{seed_name}' for unit tests in '{package_name}' package"
+                )
+            else:
+                raise ParsingError(
+                    f"Unable to find seed '{package_name}.{seed_name}' for unit tests in directories: {self.project.seed_paths}"
+                )
+
+        seed_path = Path(seed_node.root_path) / seed_node.original_file_path
+        with open(seed_path, "r") as f:
+            for row in DictReader(f):
+                rows.append(row)
+
+        return rows
+
     def parse(self) -> ParseResult:
         for data in self.get_key_dicts():
             unit_test = self._get_unit_test(data)
@@ -232,6 +264,8 @@ def parse(self) -> ParseResult:
 
             # Check that format and type of rows matches for each given input
             for input in unit_test.given:
+                if input.rows is None and input.fixture is None:
+                    input.rows = self._load_rows_from_seed(input.input)
                 input.validate_fixture("input", unit_test.name)
             unit_test.expect.validate_fixture("expected", unit_test.name)
 
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 3f0aba49444..6270928d95d 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,6 +1,6 @@
 black==23.3.0
 bumpversion
-ddtrace
+ddtrace==2.1.7
 docutils
 flake8
 flaky
diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py
index 03074c3b543..815881da12b 100644
--- a/tests/functional/unit_testing/test_unit_testing.py
+++ b/tests/functional/unit_testing/test_unit_testing.py
@@ -5,7 +5,7 @@
     get_manifest,
     get_artifact,
 )
-from dbt.exceptions import DuplicateResourceNameError
+from dbt.exceptions import DuplicateResourceNameError, ParsingError
 from fixtures import (
     my_model_vars_sql,
     my_model_a_sql,
@@ -105,3 +105,135 @@ def test_basic(self, project):
         # Select by model name
         results = run_dbt(["unit-test", "--select", "my_incremental_model"], expect_pass=True)
         assert len(results) == 2
+
+
+my_new_model = """
+select
+my_favorite_seed.id,
+a + b as c
+from {{ ref('my_favorite_seed') }} as my_favorite_seed
+inner join {{ ref('my_favorite_model') }} as my_favorite_model
+on my_favorite_seed.id = my_favorite_model.id
+"""
+
+my_favorite_model = """
+select
+2 as id,
+3 as b
+"""
+
+seed_my_favorite_seed = """id,a
+1,5
+2,4
+3,3
+4,2
+5,1
+"""
+
+schema_yml_explicit_seed = """
+unit_tests:
+  - name: t
+    model: my_new_model
+    given:
+      - input: ref('my_favorite_seed')
+        rows:
+          - {id: 1, a: 10}
+      - input: ref('my_favorite_model')
+        rows:
+          - {id: 1, b: 2}
+    expect:
+      rows:
+        - {id: 1, c: 12}
+"""
+
+schema_yml_implicit_seed = """
+unit_tests:
+  - name: t
+    model: my_new_model
+    given:
+      - input: ref('my_favorite_seed')
+      - input: ref('my_favorite_model')
+        rows:
+          - {id: 1, b: 2}
+    expect:
+      rows:
+        - {id: 1, c: 7}
+"""
+
+schema_yml_nonexistent_seed = """
+unit_tests:
+  - name: t
+    model: my_new_model
+    given:
+      - input: ref('my_second_favorite_seed')
+      - input: ref('my_favorite_model')
+        rows:
+          - {id: 1, b: 2}
+    expect:
+      rows:
+        - {id: 1, c: 7}
+"""
+
+
+class TestUnitTestExplicitSeed:
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"my_favorite_seed.csv": seed_my_favorite_seed}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_new_model.sql": my_new_model,
+            "my_favorite_model.sql": my_favorite_model,
+            "schema.yml": schema_yml_explicit_seed,
+        }
+
+    def test_explicit_seed(self, project):
+        run_dbt(["seed"])
+        run_dbt(["run"])
+
+        # Select by model name
+        results = run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=True)
+        assert len(results) == 1
+
+
+class TestUnitTestImplicitSeed:
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"my_favorite_seed.csv": seed_my_favorite_seed}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_new_model.sql": my_new_model,
+            "my_favorite_model.sql": my_favorite_model,
+            "schema.yml": schema_yml_implicit_seed,
+        }
+
+    def test_implicit_seed(self, project):
+        run_dbt(["seed"])
+        run_dbt(["run"])
+
+        # Select by model name
+        results = run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=True)
+        assert len(results) == 1
+
+
+class TestUnitTestNonexistentSeed:
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"my_favorite_seed.csv": seed_my_favorite_seed}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_new_model.sql": my_new_model,
+            "my_favorite_model.sql": my_favorite_model,
+            "schema.yml": schema_yml_nonexistent_seed,
+        }
+
+    def test_nonexistent_seed(self, project):
+        with pytest.raises(
+            ParsingError, match="Unable to find seed 'test.my_second_favorite_seed' for unit tests"
+        ):
+            run_dbt(["unit-test", "--select", "my_new_model"], expect_pass=False)