From c1bfaa848eb00ba5822b6f79f6fd8db6536427d2 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 12 Dec 2023 11:18:50 -0800 Subject: [PATCH 1/6] Add unit test that shows unit tests work with external nodes --- tests/functional/unit_testing/fixtures.py | 30 +++++++++ .../unit_testing/test_unit_testing.py | 62 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py index 5c439b92cef..127f18e1d82 100644 --- a/tests/functional/unit_testing/fixtures.py +++ b/tests/functional/unit_testing/fixtures.py @@ -649,3 +649,33 @@ rows: - {c: 3} """ + +top_level_domains_sql = """ +SELECT 'example.com' AS tld +UNION ALL +SELECT 'gmail.com' AS tld +""" + +test_my_model_external_nodes_sql = """ +WITH +accounts AS ( + SELECT user_id, email, email_top_level_domain + FROM {{ ref('external_package', 'external_model')}} +), +top_level_domains AS ( + SELECT tld FROM {{ ref('top_level_domains')}} +), +joined AS ( + SELECT + accounts.user_id as user_id, + top_level_domains.tld as tld + FROM accounts + LEFT OUTER JOIN top_level_domains + ON accounts.email_top_level_domain = top_level_domains.tld +) + +SELECT + joined.user_id as user_id, + CASE WHEN joined.tld IS NULL THEN FALSE ELSE TRUE END AS is_valid_email_address +from joined +""" diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 2c683d99598..89d54ce3e6b 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -1,4 +1,5 @@ import pytest +from unittest import mock from dbt.tests.util import ( run_dbt, write_file, @@ -6,6 +7,7 @@ ) from dbt.contracts.results import NodeStatus from dbt.exceptions import DuplicateResourceNameError, ParsingError +from dbt.plugins.manifest import PluginNodes, ModelNodeArgs from fixtures import ( my_model_sql, my_model_vars_sql, @@ -17,6 +19,8 @@ event_sql, test_my_model_incremental_yml, test_my_model_yml_invalid, + test_my_model_external_nodes_sql, + top_level_domains_sql, ) @@ -256,3 +260,61 @@ def test_invalid_input_configuration(self, project): assert len(results) == 3 run_dbt(["test"], expect_pass=False) + + +test_unit_test_with_external_nodes_yml = """ +unit_tests: + - name: test_unit_test_with_external_nodes + model: test_my_model_external_nodes + given: + - input: ref('external_package', 'external_model') + rows: + - {user_id: 1, email: cool@example.com, email_top_level_domain: example.com} + - {user_id: 2, email: cool@unknown.com, email_top_level_domain: unknown.com} + - {user_id: 3, email: badgmail.com, email_top_level_domain: gmail.com} + - {user_id: 4, email: missingdot@gmailcom, email_top_level_domain: gmail.com} + - input: ref('top_level_domains') + rows: + - {tld: example.com} + - {tld: gmail.com} + expect: + rows: + - {user_id: 1, is_valid_email_address: true} + - {user_id: 2, is_valid_email_address: false} + - {user_id: 3, is_valid_email_address: true} + - {user_id: 4, is_valid_email_address: true} +""" + + +class TestUnitTestExternalNode: + @pytest.fixture(scope="class") + def external_model_node(self): + return ModelNodeArgs( + name="external_model", + package_name="external_package", + identifier="test_identifier", + schema="test_schema", + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "top_level_domains.sql": top_level_domains_sql, + "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql, + "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml, + } + + @mock.patch("dbt.plugins.get_plugin_manager") + def test_unit_test_external_nodes( + self, + get_plugin_manager, + project, + external_model_node, + ): + # initial plugin - one external model + external_nodes = PluginNodes() + external_nodes.add_model(external_model_node) + get_plugin_manager.return_value.get_nodes.return_value = external_nodes + + results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True) + assert len(results) == 1 From fd2ef09c7599bfe624b6e33f7cbbfcf21fb1a6c0 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 11 Jan 2024 19:21:12 -0800 Subject: [PATCH 2/6] Abbreviate in names in external nodes test to stay under 64 character postgres max Was getting test failures due to resulting lengthy model names being created by unit test task in the functional test --- tests/functional/unit_testing/fixtures.py | 2 +- .../functional/unit_testing/test_unit_testing.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py index 127f18e1d82..d0ade22aa0a 100644 --- a/tests/functional/unit_testing/fixtures.py +++ b/tests/functional/unit_testing/fixtures.py @@ -656,7 +656,7 @@ SELECT 'gmail.com' AS tld """ -test_my_model_external_nodes_sql = """ +valid_emails_sql = """ WITH accounts AS ( SELECT user_id, email, email_top_level_domain diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 89d54ce3e6b..4db264ecf16 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -19,7 +19,7 @@ event_sql, test_my_model_incremental_yml, test_my_model_yml_invalid, - test_my_model_external_nodes_sql, + valid_emails_sql, top_level_domains_sql, ) @@ -262,10 +262,10 @@ def test_invalid_input_configuration(self, project): run_dbt(["test"], expect_pass=False) -test_unit_test_with_external_nodes_yml = """ +unit_test_ext_node_yml = """ unit_tests: - - name: test_unit_test_with_external_nodes - model: test_my_model_external_nodes + - name: unit_test_ext_node + model: valid_emails given: - input: ref('external_package', 'external_model') rows: @@ -300,12 +300,12 @@ def external_model_node(self): def models(self): return { "top_level_domains.sql": top_level_domains_sql, - "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql, - "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml, + "valid_emails.sql": valid_emails_sql, + "unit_test_ext_node.yml": unit_test_ext_node_yml, } @mock.patch("dbt.plugins.get_plugin_manager") - def test_unit_test_external_nodes( + def test_unit_test_ext_nodes( self, get_plugin_manager, project, @@ -316,5 +316,5 @@ def test_unit_test_external_nodes( external_nodes.add_model(external_model_node) get_plugin_manager.return_value.get_nodes.return_value = external_nodes - results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True) + results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 From b9a52c8fcf3a2855a4ffcbce22a3986bc531693b Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 12 Jan 2024 11:37:34 -0600 Subject: [PATCH 3/6] Force package search during unit test ref resolution to only look at package of unit test This is necessary because when we create the shadow manifest for the running the unit test node, the included models whether direct or external, get rewritten as being in the unit test's package. --- core/dbt/context/providers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index aafbbfa5f67..2f91ecd30d9 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -577,7 +577,7 @@ def resolve( target_package: Optional[str] = None, target_version: Optional[NodeVersion] = None, ) -> RelationProxy: - return super().resolve(target_name, target_package, target_version) + return super().resolve(target_name, self.model.package_name, target_version) # `source` implementations From 37f541ce517e59c962bc55cd900422ab971714cf Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 12 Jan 2024 12:37:09 -0600 Subject: [PATCH 4/6] Add seed to test of external node unit test, and indirectly have the external node point to it Previously I was getting an error about the columns for the external model not being fetchable from the database via the macro `get_columns_in_relation`. By creating a seed for the tests, which creates a table in postgres, we can then tell the external model that it's database schema and identifier (the relation) is that table from the seed without make the seed an actual dependency of the external model in the dag. --- .../functional/unit_testing/test_unit_testing.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 4db264ecf16..e7e1733de0a 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -285,17 +285,25 @@ def test_invalid_input_configuration(self, project): - {user_id: 4, is_valid_email_address: true} """ +external_node_seed_csv = """user_id,email,email_top_level_domain +1,"example@example.com","example.com" +""" + class TestUnitTestExternalNode: @pytest.fixture(scope="class") - def external_model_node(self): + def external_model_node(self, unique_schema): return ModelNodeArgs( name="external_model", package_name="external_package", - identifier="test_identifier", - schema="test_schema", + identifier="external_node_seed", + schema=unique_schema, ) + @pytest.fixture(scope="class") + def seeds(self): + return {"external_node_seed.csv": external_node_seed_csv} + @pytest.fixture(scope="class") def models(self): return { @@ -316,5 +324,7 @@ def test_unit_test_ext_nodes( external_nodes.add_model(external_model_node) get_plugin_manager.return_value.get_nodes.return_value = external_nodes + # `seed` need so a table exists for `external_model` to point to + run_dbt(["seed"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 From e4dcf046b012a8654ac672e8d22185a816c2dd53 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 18 Jan 2024 12:24:08 -0600 Subject: [PATCH 5/6] Ensure all models in unit test shadow manifest have a non `None` path External nodes generally don't have paths, but in unit tests we write out all models to sql files (as this allows us to test them). Thus external nodes need to have their paths set. --- core/dbt/parser/unit_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index acfaf32b852..23c98253e18 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -139,7 +139,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): **common_fields, unique_id=f"model.{test_case.package_name}.{input_name}", name=input_name, - path=original_input_node.path, + path=original_input_node.path or f"{input_name}.sql", ) elif original_input_node.resource_type == NodeType.Source: # We are reusing the database/schema/identifier from the original source, From 020a9ec950ff12f6a08d4d4f44393997cb7ae546 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 18 Jan 2024 12:29:11 -0600 Subject: [PATCH 6/6] Add `run` step to function test of unit test with external nodes This is necessary because when executing a unit tests, the columns associated with a model in the database are retrieved. For this to be possible, the model must exist in the database, thus we must run the associated models at least once first. --- tests/functional/unit_testing/test_unit_testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index e7e1733de0a..360700a7c33 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -326,5 +326,7 @@ def test_unit_test_ext_nodes( # `seed` need so a table exists for `external_model` to point to run_dbt(["seed"], expect_pass=True) + # `run` needed to ensure `top_level_domains` exists in database for column getting step + run_dbt(["run"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1