Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure unit test models can depend on external nodes #9343

Merged
Merged
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240119-101335.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Added support for external nodes in unit test nodes
time: 2024-01-19T10:13:35.589099-06:00
custom:
Author: QMalcolm MichelleArk
Issue: "8944"
7 changes: 2 additions & 5 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
MacroResultAlreadyLoadedError,
MetricArgsError,
OperationsCannotRefEphemeralNodesError,
PackageNotInDepsError,
ParsingError,
RefBadContextError,
RefArgsError,
Expand Down Expand Up @@ -675,10 +674,8 @@ def packages_for_node(self) -> Iterable[Project]:
package_name = self._node.package_name

if package_name != self._config.project_name:
if package_name not in dependencies:
# I don't think this is actually reachable
raise PackageNotInDepsError(package_name, node=self._node)
yield dependencies[package_name]
if package_name in dependencies:
yield dependencies[package_name]
Copy link
Contributor

@MichelleArk MichelleArk Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For posterity, thinking out loud about the implications of this change:

  • This method is used to load the vars available for a particular jinja (model-scoped) context via the vars macro.
  • This method falls back on the running project's vars if the node has no associated packages. If it returns the wrong package -- this could lead to a 'no var found' issue at runtime.
  • Before external models, it was not possible to have a model node exist in the project without a corresponding package, which is probably why this code path was not reachable
  • With external nodes, this is totally possible now -- so explicitly supporting both paths feels totally reasonable, and correct!

yield self._config

def _generate_merged(self) -> Mapping[str, Any]:
Expand Down
9 changes: 5 additions & 4 deletions core/dbt/parser/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):

common_fields = {
"resource_type": NodeType.Model,
"package_name": test_case.package_name,
"original_file_path": original_input_node.original_file_path,
"config": ModelConfig(materialized="ephemeral"),
"database": original_input_node.database,
Expand All @@ -137,9 +136,10 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
input_name = original_input_node.name
input_node = ModelNode(
**common_fields,
unique_id=f"model.{test_case.package_name}.{input_name}",
package_name=original_input_node.package_name,
unique_id=f"model.{original_input_node.package_name}.{input_name}",
name=input_name,
path=original_input_node.path,
path=original_input_node.path or f"{input_name}.sql",
)
elif original_input_node.resource_type == NodeType.Source:
# We are reusing the database/schema/identifier from the original source,
Expand All @@ -148,7 +148,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
input_name = original_input_node.name
input_node = UnitTestSourceDefinition(
**common_fields,
unique_id=f"model.{test_case.package_name}.{input_name}",
package_name=original_input_node.package_name,
unique_id=f"model.{original_input_node.package_name}.{input_name}",
name=original_input_node.name, # must be the same name for source lookup to work
path=input_name + ".sql", # for writing out compiled_code
source_name=original_input_node.source_name, # needed for source lookup
Expand Down
71 changes: 71 additions & 0 deletions tests/functional/unit_testing/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

my_model_vars_sql = """
SELECT
a+b as c,
Expand Down Expand Up @@ -649,3 +651,72 @@
rows:
- {c: 3}
"""

top_level_domains_sql = """
SELECT 'example.com' AS tld
UNION ALL
SELECT 'gmail.com' AS tld
"""

valid_emails_sql = """
WITH
accounts AS (
SELECT user_id, email, email_top_level_domain
FROM {{ ref('external_package', 'external_model')}}
),
top_level_domains AS (
SELECT tld FROM {{ ref('top_level_domains')}}
),
joined AS (
SELECT
accounts.user_id as user_id,
top_level_domains.tld as tld
FROM accounts
LEFT OUTER JOIN top_level_domains
ON accounts.email_top_level_domain = top_level_domains.tld
)

SELECT
joined.user_id as user_id,
CASE WHEN joined.tld IS NULL THEN FALSE ELSE TRUE END AS is_valid_email_address
from joined
"""

external_package__accounts_seed_csv = """user_id,email,email_top_level_domain
1,"example@example.com","example.com"
"""

external_package__external_model_sql = """
SELECT user_id, email, email_top_level_domain FROM {{ ref('accounts_seed') }}
"""


external_package_project_yml = """
name: external_package
version: '1.0'
config-version: 2

model-paths: ["models"] # paths to models
analysis-paths: ["analyses"] # path with analysis files which are compiled, but not run
target-path: "target" # path for compiled code
clean-targets: ["target"] # directories removed by the clean task
test-paths: ["tests"] # where to store test results
seed-paths: ["seeds"] # load CSVs from this directory with `dbt seed`
macro-paths: ["macros"] # where to find macros

profile: user

models:
external_package:
"""


@pytest.fixture(scope="class")
def external_package():
return {
"dbt_project.yml": external_package_project_yml,
"seeds": {"accounts_seed.csv": external_package__accounts_seed_csv},
"models": {
"external_model.sql": external_package__external_model_sql,
},
}
106 changes: 105 additions & 1 deletion tests/functional/unit_testing/test_unit_testing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import pytest
from unittest import mock
from dbt.tests.util import (
run_dbt,
write_file,
get_manifest,
)
from dbt.contracts.results import NodeStatus
from dbt.exceptions import DuplicateResourceNameError, ParsingError
from fixtures import (
from dbt.plugins.manifest import PluginNodes, ModelNodeArgs
from dbt.tests.fixtures.project import write_project_files
from fixtures import ( # noqa: F401
my_model_sql,
my_model_vars_sql,
my_model_a_sql,
Expand All @@ -17,6 +20,10 @@
event_sql,
test_my_model_incremental_yml,
test_my_model_yml_invalid,
valid_emails_sql,
top_level_domains_sql,
external_package__accounts_seed_csv,
external_package,
)


Expand Down Expand Up @@ -256,3 +263,100 @@ def test_invalid_input_configuration(self, project):
assert len(results) == 3

run_dbt(["test"], expect_pass=False)


unit_test_ext_node_yml = """
unit_tests:
- name: unit_test_ext_node
model: valid_emails
given:
- input: ref('external_package', 'external_model')
rows:
- {user_id: 1, email: cool@example.com, email_top_level_domain: example.com}
- {user_id: 2, email: cool@unknown.com, email_top_level_domain: unknown.com}
- {user_id: 3, email: badgmail.com, email_top_level_domain: gmail.com}
- {user_id: 4, email: missingdot@gmailcom, email_top_level_domain: gmail.com}
- input: ref('top_level_domains')
rows:
- {tld: example.com}
- {tld: gmail.com}
expect:
rows:
- {user_id: 1, is_valid_email_address: true}
- {user_id: 2, is_valid_email_address: false}
- {user_id: 3, is_valid_email_address: true}
- {user_id: 4, is_valid_email_address: true}
"""


class TestUnitTestExternalPackageNode:
@pytest.fixture(scope="class", autouse=True)
def setUp(self, project_root, external_package): # noqa: F811
write_project_files(project_root, "external_package", external_package)

@pytest.fixture(scope="class")
def packages(self):
return {"packages": [{"local": "external_package"}]}

@pytest.fixture(scope="class")
def models(self):
return {
"top_level_domains.sql": top_level_domains_sql,
"valid_emails.sql": valid_emails_sql,
"unit_test_ext_node.yml": unit_test_ext_node_yml,
}

def test_unit_test_ext_nodes(
self,
project,
):
# `deps` to install the external package
run_dbt(["deps"], expect_pass=True)
# `seed` need so a table exists for `external_model` to point to
run_dbt(["seed"], expect_pass=True)
# `run` needed to ensure `top_level_domains` exists in database for column getting step
run_dbt(["run"], expect_pass=True)
results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True)
assert len(results) == 1


class TestUnitTestExternalProjectNode:
@pytest.fixture(scope="class")
def external_model_node(self, unique_schema):
return ModelNodeArgs(
name="external_model",
package_name="external_package",
identifier="external_node_seed",
schema=unique_schema,
)

@pytest.fixture(scope="class")
def seeds(self):
return {"external_node_seed.csv": external_package__accounts_seed_csv}

@pytest.fixture(scope="class")
def models(self):
return {
"top_level_domains.sql": top_level_domains_sql,
"valid_emails.sql": valid_emails_sql,
"unit_test_ext_node.yml": unit_test_ext_node_yml,
}

@mock.patch("dbt.plugins.get_plugin_manager")
def test_unit_test_ext_nodes(
self,
get_plugin_manager,
project,
external_model_node,
):
# initial plugin - one external model
external_nodes = PluginNodes()
external_nodes.add_model(external_model_node)
get_plugin_manager.return_value.get_nodes.return_value = external_nodes

# `seed` need so a table exists for `external_model` to point to
run_dbt(["seed"], expect_pass=True)
# `run` needed to ensure `top_level_domains` exists in database for column getting step
run_dbt(["run"], expect_pass=True)
results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True)
assert len(results) == 1