From 3d76342c761f0340573afe665f2a84a0338e3e0d Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 12 Dec 2023 11:18:50 -0800 Subject: [PATCH 01/10] Add unit test that shows unit tests work with external nodes --- tests/functional/unit_testing/fixtures.py | 30 +++++++++ .../unit_testing/test_unit_testing.py | 62 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py index 5c439b92cef..127f18e1d82 100644 --- a/tests/functional/unit_testing/fixtures.py +++ b/tests/functional/unit_testing/fixtures.py @@ -649,3 +649,33 @@ rows: - {c: 3} """ + +top_level_domains_sql = """ +SELECT 'example.com' AS tld +UNION ALL +SELECT 'gmail.com' AS tld +""" + +test_my_model_external_nodes_sql = """ +WITH +accounts AS ( + SELECT user_id, email, email_top_level_domain + FROM {{ ref('external_package', 'external_model')}} +), +top_level_domains AS ( + SELECT tld FROM {{ ref('top_level_domains')}} +), +joined AS ( + SELECT + accounts.user_id as user_id, + top_level_domains.tld as tld + FROM accounts + LEFT OUTER JOIN top_level_domains + ON accounts.email_top_level_domain = top_level_domains.tld +) + +SELECT + joined.user_id as user_id, + CASE WHEN joined.tld IS NULL THEN FALSE ELSE TRUE END AS is_valid_email_address +from joined +""" diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 2c683d99598..89d54ce3e6b 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -1,4 +1,5 @@ import pytest +from unittest import mock from dbt.tests.util import ( run_dbt, write_file, @@ -6,6 +7,7 @@ ) from dbt.contracts.results import NodeStatus from dbt.exceptions import DuplicateResourceNameError, ParsingError +from dbt.plugins.manifest import PluginNodes, ModelNodeArgs from fixtures import ( my_model_sql, my_model_vars_sql, @@ -17,6 +19,8 @@ event_sql, test_my_model_incremental_yml, test_my_model_yml_invalid, + test_my_model_external_nodes_sql, + top_level_domains_sql, ) @@ -256,3 +260,61 @@ def test_invalid_input_configuration(self, project): assert len(results) == 3 run_dbt(["test"], expect_pass=False) + + +test_unit_test_with_external_nodes_yml = """ +unit_tests: + - name: test_unit_test_with_external_nodes + model: test_my_model_external_nodes + given: + - input: ref('external_package', 'external_model') + rows: + - {user_id: 1, email: cool@example.com, email_top_level_domain: example.com} + - {user_id: 2, email: cool@unknown.com, email_top_level_domain: unknown.com} + - {user_id: 3, email: badgmail.com, email_top_level_domain: gmail.com} + - {user_id: 4, email: missingdot@gmailcom, email_top_level_domain: gmail.com} + - input: ref('top_level_domains') + rows: + - {tld: example.com} + - {tld: gmail.com} + expect: + rows: + - {user_id: 1, is_valid_email_address: true} + - {user_id: 2, is_valid_email_address: false} + - {user_id: 3, is_valid_email_address: true} + - {user_id: 4, is_valid_email_address: true} +""" + + +class TestUnitTestExternalNode: + @pytest.fixture(scope="class") + def external_model_node(self): + return ModelNodeArgs( + name="external_model", + package_name="external_package", + identifier="test_identifier", + schema="test_schema", + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "top_level_domains.sql": top_level_domains_sql, + "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql, + "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml, + } + + @mock.patch("dbt.plugins.get_plugin_manager") + def test_unit_test_external_nodes( + self, + get_plugin_manager, + project, + external_model_node, + ): + # initial plugin - one external model + external_nodes = PluginNodes() + external_nodes.add_model(external_model_node) + get_plugin_manager.return_value.get_nodes.return_value = external_nodes + + results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True) + assert len(results) == 1 From ded39d430cbcb98a30c3683f4579e2a7ea7a7538 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 11 Jan 2024 19:21:12 -0800 Subject: [PATCH 02/10] Abbreviate in names in external nodes test to stay under 64 character postgres max Was getting test failures due to resulting lengthy model names being created by unit test task in the functional test --- tests/functional/unit_testing/fixtures.py | 2 +- .../functional/unit_testing/test_unit_testing.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py index 127f18e1d82..d0ade22aa0a 100644 --- a/tests/functional/unit_testing/fixtures.py +++ b/tests/functional/unit_testing/fixtures.py @@ -656,7 +656,7 @@ SELECT 'gmail.com' AS tld """ -test_my_model_external_nodes_sql = """ +valid_emails_sql = """ WITH accounts AS ( SELECT user_id, email, email_top_level_domain diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 89d54ce3e6b..4db264ecf16 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -19,7 +19,7 @@ event_sql, test_my_model_incremental_yml, test_my_model_yml_invalid, - test_my_model_external_nodes_sql, + valid_emails_sql, top_level_domains_sql, ) @@ -262,10 +262,10 @@ def test_invalid_input_configuration(self, project): run_dbt(["test"], expect_pass=False) -test_unit_test_with_external_nodes_yml = """ +unit_test_ext_node_yml = """ unit_tests: - - name: test_unit_test_with_external_nodes - model: test_my_model_external_nodes + - name: unit_test_ext_node + model: valid_emails given: - input: ref('external_package', 'external_model') rows: @@ -300,12 +300,12 @@ def external_model_node(self): def models(self): return { "top_level_domains.sql": top_level_domains_sql, - "test_my_model_external_nodes.sql": test_my_model_external_nodes_sql, - "test_unit_test_with_external_nodes.yml": test_unit_test_with_external_nodes_yml, + "valid_emails.sql": valid_emails_sql, + "unit_test_ext_node.yml": unit_test_ext_node_yml, } @mock.patch("dbt.plugins.get_plugin_manager") - def test_unit_test_external_nodes( + def test_unit_test_ext_nodes( self, get_plugin_manager, project, @@ -316,5 +316,5 @@ def test_unit_test_external_nodes( external_nodes.add_model(external_model_node) get_plugin_manager.return_value.get_nodes.return_value = external_nodes - results = run_dbt(["test", "--select", "test_my_model_external_nodes"], expect_pass=True) + results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 From c5b307a33a4c641a47a15343e725cd0a4f350dd9 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 12 Jan 2024 12:56:21 -0600 Subject: [PATCH 03/10] Fix unit test parsing to ensure external nodes continue to keep their package name --- core/dbt/parser/unit_tests.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index acfaf32b852..432b23e762b 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -118,7 +118,6 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): common_fields = { "resource_type": NodeType.Model, - "package_name": test_case.package_name, "original_file_path": original_input_node.original_file_path, "config": ModelConfig(materialized="ephemeral"), "database": original_input_node.database, @@ -137,7 +136,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): input_name = original_input_node.name input_node = ModelNode( **common_fields, - unique_id=f"model.{test_case.package_name}.{input_name}", + package_name=original_input_node.package_name, + unique_id=f"model.{original_input_node.package_name}.{input_name}", name=input_name, path=original_input_node.path, ) @@ -148,7 +148,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): input_name = original_input_node.name input_node = UnitTestSourceDefinition( **common_fields, - unique_id=f"model.{test_case.package_name}.{input_name}", + package_name=original_input_node.package_name, + unique_id=f"model.{original_input_node.package_name}.{input_name}", name=original_input_node.name, # must be the same name for source lookup to work path=input_name + ".sql", # for writing out compiled_code source_name=original_input_node.source_name, # needed for source lookup From 31af34bc3f9e6231f211e6b620b70f8519d0e1bf Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 12 Jan 2024 12:37:09 -0600 Subject: [PATCH 04/10] Add seed to test of external node unit test, and indirectly have the external node point to it Previously I was getting an error about the columns for the external model not being fetchable from the database via the macro `get_columns_in_relation`. By creating a seed for the tests, which creates a table in postgres, we can then tell the external model that it's database schema and identifier (the relation) is that table from the seed without make the seed an actual dependency of the external model in the dag. --- .../functional/unit_testing/test_unit_testing.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 4db264ecf16..e7e1733de0a 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -285,17 +285,25 @@ def test_invalid_input_configuration(self, project): - {user_id: 4, is_valid_email_address: true} """ +external_node_seed_csv = """user_id,email,email_top_level_domain +1,"example@example.com","example.com" +""" + class TestUnitTestExternalNode: @pytest.fixture(scope="class") - def external_model_node(self): + def external_model_node(self, unique_schema): return ModelNodeArgs( name="external_model", package_name="external_package", - identifier="test_identifier", - schema="test_schema", + identifier="external_node_seed", + schema=unique_schema, ) + @pytest.fixture(scope="class") + def seeds(self): + return {"external_node_seed.csv": external_node_seed_csv} + @pytest.fixture(scope="class") def models(self): return { @@ -316,5 +324,7 @@ def test_unit_test_ext_nodes( external_nodes.add_model(external_model_node) get_plugin_manager.return_value.get_nodes.return_value = external_nodes + # `seed` need so a table exists for `external_model` to point to + run_dbt(["seed"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 From 3584e6e49735a04035fdaf5a488fd4a0e3bb114a Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 18 Jan 2024 12:24:08 -0600 Subject: [PATCH 05/10] Ensure all models in unit test shadow manifest have a non `None` path External nodes generally don't have paths, but in unit tests we write out all models to sql files (as this allows us to test them). Thus external nodes need to have their paths set. --- core/dbt/parser/unit_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 432b23e762b..aebabb81b88 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -139,7 +139,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): package_name=original_input_node.package_name, unique_id=f"model.{original_input_node.package_name}.{input_name}", name=input_name, - path=original_input_node.path, + path=original_input_node.path or f"{input_name}.sql", ) elif original_input_node.resource_type == NodeType.Source: # We are reusing the database/schema/identifier from the original source, From b57a7f7d34c67571c108a0ad52c01f258f258da7 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 18 Jan 2024 12:29:11 -0600 Subject: [PATCH 06/10] Add `run` step to function test of unit test with external nodes This is necessary because when executing a unit tests, the columns associated with a model in the database are retrieved. For this to be possible, the model must exist in the database, thus we must run the associated models at least once first. --- tests/functional/unit_testing/test_unit_testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index e7e1733de0a..360700a7c33 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -326,5 +326,7 @@ def test_unit_test_ext_nodes( # `seed` need so a table exists for `external_model` to point to run_dbt(["seed"], expect_pass=True) + # `run` needed to ensure `top_level_domains` exists in database for column getting step + run_dbt(["run"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 From adeb3247af8a169644c812c4ce3218d3bfb4d3ae Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 18 Jan 2024 16:49:37 -0600 Subject: [PATCH 07/10] Create a full external package for function test of a unit test with an external node Previously we were only pseudo creating an external package for testing how unit tests work with external nodes. This was problematic because the package didn't actually exist and thus wasn't seen as accessible when running through dag dependencies. By actually creating the external package, we ensure that all the built in normal processes happen. --- tests/functional/unit_testing/fixtures.py | 41 +++++++++++++++++++ .../unit_testing/test_unit_testing.py | 35 +++++----------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/tests/functional/unit_testing/fixtures.py b/tests/functional/unit_testing/fixtures.py index d0ade22aa0a..345ea46cae4 100644 --- a/tests/functional/unit_testing/fixtures.py +++ b/tests/functional/unit_testing/fixtures.py @@ -1,3 +1,5 @@ +import pytest + my_model_vars_sql = """ SELECT a+b as c, @@ -679,3 +681,42 @@ CASE WHEN joined.tld IS NULL THEN FALSE ELSE TRUE END AS is_valid_email_address from joined """ + +external_package__accounts_seed_csv = """user_id,email,email_top_level_domain +1,"example@example.com","example.com" +""" + +external_package__external_model_sql = """ +SELECT user_id, email, email_top_level_domain FROM {{ ref('accounts_seed') }} +""" + + +external_package_project_yml = """ +name: external_package +version: '1.0' +config-version: 2 + +model-paths: ["models"] # paths to models +analysis-paths: ["analyses"] # path with analysis files which are compiled, but not run +target-path: "target" # path for compiled code +clean-targets: ["target"] # directories removed by the clean task +test-paths: ["tests"] # where to store test results +seed-paths: ["seeds"] # load CSVs from this directory with `dbt seed` +macro-paths: ["macros"] # where to find macros + +profile: user + +models: + external_package: +""" + + +@pytest.fixture(scope="class") +def external_package(): + return { + "dbt_project.yml": external_package_project_yml, + "seeds": {"accounts_seed.csv": external_package__accounts_seed_csv}, + "models": { + "external_model.sql": external_package__external_model_sql, + }, + } diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 360700a7c33..90cae2ad878 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -1,5 +1,4 @@ import pytest -from unittest import mock from dbt.tests.util import ( run_dbt, write_file, @@ -7,8 +6,8 @@ ) from dbt.contracts.results import NodeStatus from dbt.exceptions import DuplicateResourceNameError, ParsingError -from dbt.plugins.manifest import PluginNodes, ModelNodeArgs -from fixtures import ( +from dbt.tests.fixtures.project import write_project_files +from fixtures import ( # noqa: F401 my_model_sql, my_model_vars_sql, my_model_a_sql, @@ -21,6 +20,7 @@ test_my_model_yml_invalid, valid_emails_sql, top_level_domains_sql, + external_package, ) @@ -285,24 +285,15 @@ def test_invalid_input_configuration(self, project): - {user_id: 4, is_valid_email_address: true} """ -external_node_seed_csv = """user_id,email,email_top_level_domain -1,"example@example.com","example.com" -""" - class TestUnitTestExternalNode: - @pytest.fixture(scope="class") - def external_model_node(self, unique_schema): - return ModelNodeArgs( - name="external_model", - package_name="external_package", - identifier="external_node_seed", - schema=unique_schema, - ) + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project_root, external_package): # noqa: F811 + write_project_files(project_root, "external_package", external_package) @pytest.fixture(scope="class") - def seeds(self): - return {"external_node_seed.csv": external_node_seed_csv} + def packages(self): + return {"packages": [{"local": "external_package"}]} @pytest.fixture(scope="class") def models(self): @@ -312,18 +303,12 @@ def models(self): "unit_test_ext_node.yml": unit_test_ext_node_yml, } - @mock.patch("dbt.plugins.get_plugin_manager") def test_unit_test_ext_nodes( self, - get_plugin_manager, project, - external_model_node, ): - # initial plugin - one external model - external_nodes = PluginNodes() - external_nodes.add_model(external_model_node) - get_plugin_manager.return_value.get_nodes.return_value = external_nodes - + # `deps` to install the external package + run_dbt(["deps"], expect_pass=True) # `seed` need so a table exists for `external_model` to point to run_dbt(["seed"], expect_pass=True) # `run` needed to ensure `top_level_domains` exists in database for column getting step From 45e33b4b2031f9def15c967af87f12c529be5f78 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 19 Jan 2024 09:55:16 -0600 Subject: [PATCH 08/10] Add test for more ephemoral external models --- .../unit_testing/test_unit_testing.py | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 90cae2ad878..890af69a115 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -1,4 +1,5 @@ import pytest +from unittest import mock from dbt.tests.util import ( run_dbt, write_file, @@ -6,6 +7,7 @@ ) from dbt.contracts.results import NodeStatus from dbt.exceptions import DuplicateResourceNameError, ParsingError +from dbt.plugins.manifest import PluginNodes, ModelNodeArgs from dbt.tests.fixtures.project import write_project_files from fixtures import ( # noqa: F401 my_model_sql, @@ -20,6 +22,7 @@ test_my_model_yml_invalid, valid_emails_sql, top_level_domains_sql, + external_package__accounts_seed_csv, external_package, ) @@ -286,7 +289,7 @@ def test_invalid_input_configuration(self, project): """ -class TestUnitTestExternalNode: +class TestUnitTestExternalPackageNode: @pytest.fixture(scope="class", autouse=True) def setUp(self, project_root, external_package): # noqa: F811 write_project_files(project_root, "external_package", external_package) @@ -315,3 +318,45 @@ def test_unit_test_ext_nodes( run_dbt(["run"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 + + +class TestUnitTestExternalProjectNode: + @pytest.fixture(scope="class") + def external_model_node(self, unique_schema): + return ModelNodeArgs( + name="external_model", + package_name="external_package", + identifier="external_node_seed", + schema=unique_schema, + ) + + @pytest.fixture(scope="class") + def seeds(self): + return {"external_node_seed.csv": external_package__accounts_seed_csv} + + @pytest.fixture(scope="class") + def models(self): + return { + "top_level_domains.sql": top_level_domains_sql, + "valid_emails.sql": valid_emails_sql, + "unit_test_ext_node.yml": unit_test_ext_node_yml, + } + + @mock.patch("dbt.plugins.get_plugin_manager") + def test_unit_test_ext_nodes( + self, + get_plugin_manager, + project, + external_model_node, + ): + # initial plugin - one external model + external_nodes = PluginNodes() + external_nodes.add_model(external_model_node) + get_plugin_manager.return_value.get_nodes.return_value = external_nodes + + # `seed` need so a table exists for `external_model` to point to + run_dbt(["seed"], expect_pass=True) + # `run` needed to ensure `top_level_domains` exists in database for column getting step + run_dbt(["run"], expect_pass=True) + results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) + assert len(results) == 1 From 9a13f79527b20ff53e3b301f73d447e0e035410c Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 19 Jan 2024 09:57:15 -0600 Subject: [PATCH 09/10] Flip logic in `packages_for_node` to remove error case By flipping the logic from `not in` to `in` we can drop the exception and instead default to the model runtime config when the package isn't found. We're still trying to grok if there will be any fallout from this. The tests all pass, but that doesn't guarantee nothing bad will happen. --- core/dbt/context/providers.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index aafbbfa5f67..fbb64da5553 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -67,7 +67,6 @@ MacroResultAlreadyLoadedError, MetricArgsError, OperationsCannotRefEphemeralNodesError, - PackageNotInDepsError, ParsingError, RefBadContextError, RefArgsError, @@ -675,10 +674,8 @@ def packages_for_node(self) -> Iterable[Project]: package_name = self._node.package_name if package_name != self._config.project_name: - if package_name not in dependencies: - # I don't think this is actually reachable - raise PackageNotInDepsError(package_name, node=self._node) - yield dependencies[package_name] + if package_name in dependencies: + yield dependencies[package_name] yield self._config def _generate_merged(self) -> Mapping[str, Any]: From 18abef2aac74ff364f3a101c9b57953441d390e5 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Fri, 19 Jan 2024 10:14:05 -0600 Subject: [PATCH 10/10] Add changie doc for added support of external nodes in unit tests --- .changes/unreleased/Features-20240119-101335.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20240119-101335.yaml diff --git a/.changes/unreleased/Features-20240119-101335.yaml b/.changes/unreleased/Features-20240119-101335.yaml new file mode 100644 index 00000000000..0dcc711797c --- /dev/null +++ b/.changes/unreleased/Features-20240119-101335.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Added support for external nodes in unit test nodes +time: 2024-01-19T10:13:35.589099-06:00 +custom: + Author: QMalcolm MichelleArk + Issue: "8944"