From c1bfaa848eb00ba5822b6f79f6fd8db6536427d2 Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Tue, 12 Dec 2023 11:18:50 -0800
Subject: [PATCH 1/6] Add unit test that shows unit tests work with external
 nodes

---
 tests/functional/unit_testing/fixtures.py     | 30 +++++++++
 .../unit_testing/test_unit_testing.py         | 62 +++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py
index 5c439b92cef..127f18e1d82 100644
--- a/tests/functional/unit_testing/fixtures.py
+++ b/tests/functional/unit_testing/fixtures.py
@@ -649,3 +649,33 @@
       rows:
         - {c: 3}
 """
+
+top_level_domains_sql = """
+SELECT 'example.com' AS tld
+UNION ALL
+SELECT 'gmail.com' AS tld
+"""
+
+test_my_model_external_nodes_sql = """
+WITH
+accounts AS (
+  SELECT user_id, email, email_top_level_domain
+  FROM {{ ref('external_package', 'external_model')}}
+),
+top_level_domains AS (
+  SELECT tld FROM {{ ref('top_level_domains')}}
+),
+joined AS (
+  SELECT
+    accounts.user_id as user_id,
+    top_level_domains.tld as tld
+  FROM accounts
+  LEFT OUTER JOIN top_level_domains
+    ON   accounts.email_top_level_domain = top_level_domains.tld
+)
+
+SELECT
+  joined.user_id as user_id,
+  CASE WHEN joined.tld IS NULL THEN FALSE ELSE TRUE END AS is_valid_email_address
+from joined
+"""
diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py
index 2c683d99598..89d54ce3e6b 100644
--- a/tests/functional/unit_testing/test_unit_testing.py
+++ b/tests/functional/unit_testing/test_unit_testing.py
@@ -1,4 +1,5 @@
 import pytest
+from unittest import mock
 from dbt.tests.util import (
     run_dbt,
     write_file,
@@ -6,6 +7,7 @@
 )
 from dbt.contracts.results import NodeStatus
 from dbt.exceptions import DuplicateResourceNameError, ParsingError
+from dbt.plugins.manifest import PluginNodes, ModelNodeArgs
 from fixtures import (
     my_model_sql,
     my_model_vars_sql,
@@ -17,6 +19,8 @@
     event_sql,
     test_my_model_incremental_yml,
     test_my_model_yml_invalid,
+    test_my_model_external_nodes_sql,
+    top_level_domains_sql,
 )
 
 
@@ -256,3 +260,61 @@ def test_invalid_input_configuration(self, project):
         assert len(results) == 3
 
         run_dbt(["test"], expect_pass=False)
+
+
+test_unit_test_with_external_nodes_yml = """
+unit_tests:
+  - name: test_unit_test_with_external_nodes
+    model: test_my_model_external_nodes
+    given:
+      - input: ref('external_package', 'external_model')
+        rows:
+          - {user_id: 1, email: cool@example.com,     email_top_level_domain: example.com}
+          - {user_id: 2, email: cool@unknown.com,     email_top_level_domain: unknown.com}
+          - {user_id: 3, email: badgmail.com,         email_top_level_domain: gmail.com}
+          - {user_id: 4, email: missingdot@gmailcom,  email_top_level_domain: gmail.com}
+      - input: ref('top_level_domains')
+        rows:
+          - {tld: example.com}
+          - {tld: gmail.com}
+    expect:
+      rows:
+        - {user_id: 1, is_valid_email_address: true}
+        - {user_id: 2, is_valid_email_address: false}
+        - {user_id: 3, is_valid_email_address: true}
+        - {user_id: 4, is_valid_email_address: true}
+"""
+
+
+class TestUnitTestExternalNode:
+    @pytest.fixture(scope="class")
+    def external_model_node(self):
+        return ModelNodeArgs(
+            name="external_model",
+            package_name="external_package",
+            identifier="test_identifier",
+            schema="test_schema",
+        )
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "top_level_domains.sql": top_level_domains_sql,
+            "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql,
+            "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml,
+        }
+
+    @mock.patch("dbt.plugins.get_plugin_manager")
+    def test_unit_test_external_nodes(
+        self,
+        get_plugin_manager,
+        project,
+        external_model_node,
+    ):
+        # initial plugin - one external model
+        external_nodes = PluginNodes()
+        external_nodes.add_model(external_model_node)
+        get_plugin_manager.return_value.get_nodes.return_value = external_nodes
+
+        results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True)
+        assert len(results) == 1

From fd2ef09c7599bfe624b6e33f7cbbfcf21fb1a6c0 Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Thu, 11 Jan 2024 19:21:12 -0800
Subject: [PATCH 2/6] Abbreviate in names in external nodes test to stay under
 64 character postgres max

Was getting test failures due to resulting lengthy model names being created
by unit test task in the functional test
---
 tests/functional/unit_testing/fixtures.py        |  2 +-
 .../functional/unit_testing/test_unit_testing.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py
index 127f18e1d82..d0ade22aa0a 100644
--- a/tests/functional/unit_testing/fixtures.py
+++ b/tests/functional/unit_testing/fixtures.py
@@ -656,7 +656,7 @@
 SELECT 'gmail.com' AS tld
 """
 
-test_my_model_external_nodes_sql = """
+valid_emails_sql = """
 WITH
 accounts AS (
   SELECT user_id, email, email_top_level_domain
diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py
index 89d54ce3e6b..4db264ecf16 100644
--- a/tests/functional/unit_testing/test_unit_testing.py
+++ b/tests/functional/unit_testing/test_unit_testing.py
@@ -19,7 +19,7 @@
     event_sql,
     test_my_model_incremental_yml,
     test_my_model_yml_invalid,
-    test_my_model_external_nodes_sql,
+    valid_emails_sql,
     top_level_domains_sql,
 )
 
@@ -262,10 +262,10 @@ def test_invalid_input_configuration(self, project):
         run_dbt(["test"], expect_pass=False)
 
 
-test_unit_test_with_external_nodes_yml = """
+unit_test_ext_node_yml = """
 unit_tests:
-  - name: test_unit_test_with_external_nodes
-    model: test_my_model_external_nodes
+  - name: unit_test_ext_node
+    model: valid_emails
     given:
       - input: ref('external_package', 'external_model')
         rows:
@@ -300,12 +300,12 @@ def external_model_node(self):
     def models(self):
         return {
             "top_level_domains.sql": top_level_domains_sql,
-            "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql,
-            "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml,
+            "valid_emails.sql": valid_emails_sql,
+            "unit_test_ext_node.yml": unit_test_ext_node_yml,
         }
 
     @mock.patch("dbt.plugins.get_plugin_manager")
-    def test_unit_test_external_nodes(
+    def test_unit_test_ext_nodes(
         self,
         get_plugin_manager,
         project,
@@ -316,5 +316,5 @@ def test_unit_test_external_nodes(
         external_nodes.add_model(external_model_node)
         get_plugin_manager.return_value.get_nodes.return_value = external_nodes
 
-        results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True)
+        results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True)
         assert len(results) == 1

From b9a52c8fcf3a2855a4ffcbce22a3986bc531693b Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Fri, 12 Jan 2024 11:37:34 -0600
Subject: [PATCH 3/6] Force package search during unit test ref resolution to
 only look at package of unit test

This is necessary because when we create the shadow manifest for the running
the unit test node, the included models whether direct or external, get rewritten
as being in the unit test's package.
---
 core/dbt/context/providers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py
index aafbbfa5f67..2f91ecd30d9 100644
--- a/core/dbt/context/providers.py
+++ b/core/dbt/context/providers.py
@@ -577,7 +577,7 @@ def resolve(
         target_package: Optional[str] = None,
         target_version: Optional[NodeVersion] = None,
     ) -> RelationProxy:
-        return super().resolve(target_name, target_package, target_version)
+        return super().resolve(target_name, self.model.package_name, target_version)
 
 
 # `source` implementations

From 37f541ce517e59c962bc55cd900422ab971714cf Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Fri, 12 Jan 2024 12:37:09 -0600
Subject: [PATCH 4/6] Add seed to test of external node unit test, and
 indirectly have the external node point to it

Previously I was getting an error about the columns for the external model
not being fetchable from the database via the macro `get_columns_in_relation`.
By creating a seed for the tests, which creates a table in postgres, we can then
tell the external model that it's database schema and identifier (the relation)
is that table from the seed without make the seed an actual dependency of the
external model in the dag.
---
 .../functional/unit_testing/test_unit_testing.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py
index 4db264ecf16..e7e1733de0a 100644
--- a/tests/functional/unit_testing/test_unit_testing.py
+++ b/tests/functional/unit_testing/test_unit_testing.py
@@ -285,17 +285,25 @@ def test_invalid_input_configuration(self, project):
         - {user_id: 4, is_valid_email_address: true}
 """
 
+external_node_seed_csv = """user_id,email,email_top_level_domain
+1,"example@example.com","example.com"
+"""
+
 
 class TestUnitTestExternalNode:
     @pytest.fixture(scope="class")
-    def external_model_node(self):
+    def external_model_node(self, unique_schema):
         return ModelNodeArgs(
             name="external_model",
             package_name="external_package",
-            identifier="test_identifier",
-            schema="test_schema",
+            identifier="external_node_seed",
+            schema=unique_schema,
         )
 
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"external_node_seed.csv": external_node_seed_csv}
+
     @pytest.fixture(scope="class")
     def models(self):
         return {
@@ -316,5 +324,7 @@ def test_unit_test_ext_nodes(
         external_nodes.add_model(external_model_node)
         get_plugin_manager.return_value.get_nodes.return_value = external_nodes
 
+        # `seed` need so a table exists for `external_model` to point to
+        run_dbt(["seed"], expect_pass=True)
         results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True)
         assert len(results) == 1

From e4dcf046b012a8654ac672e8d22185a816c2dd53 Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Thu, 18 Jan 2024 12:24:08 -0600
Subject: [PATCH 5/6] Ensure all models in unit test shadow manifest have a non
 `None` path

External nodes generally don't have paths, but in unit tests we write out
all models to sql files (as this allows us to test them). Thus external
nodes need to have their paths set.
---
 core/dbt/parser/unit_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py
index acfaf32b852..23c98253e18 100644
--- a/core/dbt/parser/unit_tests.py
+++ b/core/dbt/parser/unit_tests.py
@@ -139,7 +139,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
                     **common_fields,
                     unique_id=f"model.{test_case.package_name}.{input_name}",
                     name=input_name,
-                    path=original_input_node.path,
+                    path=original_input_node.path or f"{input_name}.sql",
                 )
             elif original_input_node.resource_type == NodeType.Source:
                 # We are reusing the database/schema/identifier from the original source,

From 020a9ec950ff12f6a08d4d4f44393997cb7ae546 Mon Sep 17 00:00:00 2001
From: Quigley Malcolm <quigley.malcolm@dbtlabs.com>
Date: Thu, 18 Jan 2024 12:29:11 -0600
Subject: [PATCH 6/6] Add `run` step to function test of unit test with
 external nodes

This is necessary because when executing a unit tests, the columns
associated with a model in the database are retrieved. For this to
be possible, the model must exist in the database, thus we must
run the associated models at least once first.
---
 tests/functional/unit_testing/test_unit_testing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py
index e7e1733de0a..360700a7c33 100644
--- a/tests/functional/unit_testing/test_unit_testing.py
+++ b/tests/functional/unit_testing/test_unit_testing.py
@@ -326,5 +326,7 @@ def test_unit_test_ext_nodes(
 
         # `seed` need so a table exists for `external_model` to point to
         run_dbt(["seed"], expect_pass=True)
+        # `run` needed to ensure `top_level_domains` exists in database for column getting step
+        run_dbt(["run"], expect_pass=True)
         results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True)
         assert len(results) == 1