From 1ded1cafd8004c51493f099e66ed635c908336d3 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 10 May 2023 16:16:38 -0400 Subject: [PATCH 1/2] CT 2510 Throw error for duplicate versioned and non versioned model names (#7577) * Check for versioned/unversioned duplicates * Add new exception DuplicateVersionedUnversionedError * Changie * Handle packages when finding versioned and unversioned duplicates (cherry picked from commit 29f2cfc48d19c0500d71ed282b04acd735b9f87e) --- .../unreleased/Fixes-20230509-165007.yaml | 6 ++++ core/dbt/exceptions.py | 20 +++++++++++ core/dbt/parser/manifest.py | 35 ++++++++++++++++--- .../partial_parsing/test_partial_parsing.py | 9 ++++- 4 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 .changes/unreleased/Fixes-20230509-165007.yaml diff --git a/.changes/unreleased/Fixes-20230509-165007.yaml b/.changes/unreleased/Fixes-20230509-165007.yaml new file mode 100644 index 00000000000..0c0f06b0525 --- /dev/null +++ b/.changes/unreleased/Fixes-20230509-165007.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Throw error for duplicated versioned and unversioned models +time: 2023-05-09T16:50:07.530882-04:00 +custom: + Author: gshank + Issue: "7487" diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 269a0feb614..5225bbb712d 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -2145,6 +2145,26 @@ def get_message(self) -> str: return msg +class DuplicateVersionedUnversionedError(ParsingError): + def __init__(self, versioned_node, unversioned_node): + self.versioned_node = versioned_node + self.unversioned_node = unversioned_node + super().__init__(msg=self.get_message()) + + def get_message(self) -> str: + msg = f""" +dbt found versioned and unversioned models with the name "{self.versioned_node.name}". + +Since these resources have the same name, dbt will be unable to find the correct resource +when looking for ref('{self.versioned_node.name}'). + +To fix this, change the name of the unversioned resource +{self.unversioned_node.unique_id} ({self.unversioned_node.original_file_path}) +or add the unversioned model to the versions in {self.versioned_node.patch_path} + """.strip() + return msg + + class PropertyYMLError(CompilationError): def __init__(self, path: str, issue: str): self.path = path diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index d17d171e315..a22ca559cd8 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -1094,21 +1094,29 @@ def _check_resource_uniqueness( ) -> None: names_resources: Dict[str, ManifestNode] = {} alias_resources: Dict[str, ManifestNode] = {} + versioned_resources: Dict[str, ManifestNode] = {} + unversioned_resources: Dict[str, ManifestNode] = {} + duplicate_resources: Dict[str, list] = {} for resource, node in manifest.nodes.items(): if not node.is_relational: continue name = node.name + if node.is_versioned: + versioned_resources[name] = node + else: + unversioned_resources[name] = node + + existing_node = names_resources.get(name) + if existing_node is not None: + duplicate_resources[name] = [existing_node, node] + # the full node name is really defined by the adapter's relation relation_cls = get_relation_class_by_name(config.credentials.type) relation = relation_cls.create_from(config=config, node=node) full_node_name = str(relation) - existing_node = names_resources.get(name) - if existing_node is not None and not existing_node.is_versioned: - raise dbt.exceptions.DuplicateResourceNameError(existing_node, node) - existing_alias = alias_resources.get(full_node_name) if existing_alias is not None: raise AmbiguousAliasError( @@ -1118,6 +1126,25 @@ def _check_resource_uniqueness( names_resources[name] = node alias_resources[full_node_name] = node + # Check for versioned models with unversioned duplicates + intersection_versioned = set(versioned_resources.keys()).intersection( + set(unversioned_resources.keys()) + ) + for name in intersection_versioned: + versioned_node = versioned_resources[name] + unversioned_node = unversioned_resources[name] + raise dbt.exceptions.DuplicateVersionedUnversionedError( + versioned_node, unversioned_node + ) + + # Handle base case of multiple unversioned models with same name + intersection_unversioned = set(duplicate_resources.keys()) - set( + versioned_resources.keys() + ) + for name in intersection_unversioned: + existing_node, node = duplicate_resources[name] + raise dbt.exceptions.DuplicateResourceNameError(existing_node, node) + def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeConfig) -> None: resource_fqns: Mapping[str, PathSet] = manifest.get_resource_fqns() diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index 76a06b69a3d..bc08eac833c 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -71,7 +71,7 @@ groups_schema_yml_two_groups_private_orders_invalid_access, ) -from dbt.exceptions import CompilationError, ParsingError +from dbt.exceptions import CompilationError, ParsingError, DuplicateVersionedUnversionedError from dbt.contracts.files import ParseFileType from dbt.contracts.results import TestStatus import re @@ -358,6 +358,13 @@ def test_pp_versioned_models(self, project): write_file(model_two_sql, project.project_root, "models", "model_one_different.sql") results = run_dbt(["--partial-parse", "run"]) assert len(results) == 3 + manifest = get_manifest(project.project_root) + assert len(manifest.nodes) == 3 + + # create a new model_one in model_one.sql and re-parse + write_file(model_one_sql, project.project_root, "models", "model_one.sql") + with pytest.raises(DuplicateVersionedUnversionedError): + run_dbt(["parse"]) class TestSources: From 5c6ea3f81bf1cc2425ef605651be5f9eb4e1804c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 11 May 2023 12:57:22 -0400 Subject: [PATCH 2/2] Issue AmbiguousAlias error after DuplicateResourceName --- core/dbt/parser/manifest.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index a22ca559cd8..7e974262919 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -1097,6 +1097,7 @@ def _check_resource_uniqueness( versioned_resources: Dict[str, ManifestNode] = {} unversioned_resources: Dict[str, ManifestNode] = {} duplicate_resources: Dict[str, list] = {} + duplicate_aliases: Dict[str, list] = {} for resource, node in manifest.nodes.items(): if not node.is_relational: @@ -1119,9 +1120,7 @@ def _check_resource_uniqueness( existing_alias = alias_resources.get(full_node_name) if existing_alias is not None: - raise AmbiguousAliasError( - node_1=existing_alias, node_2=node, duped_name=full_node_name - ) + duplicate_aliases[full_node_name] = [existing_alias, node] names_resources[name] = node alias_resources[full_node_name] = node @@ -1137,7 +1136,11 @@ def _check_resource_uniqueness( versioned_node, unversioned_node ) - # Handle base case of multiple unversioned models with same name + # Handle base case of multiple unversioned models with same name. + # This is for same name in multiple packages. Same name in the same + # package will be caught by _check_duplicates called by add_node. + # Version 1.6 will remove the restriction of having the same name in + # multiple packages. intersection_unversioned = set(duplicate_resources.keys()) - set( versioned_resources.keys() ) @@ -1145,6 +1148,15 @@ def _check_resource_uniqueness( existing_node, node = duplicate_resources[name] raise dbt.exceptions.DuplicateResourceNameError(existing_node, node) + # Handle ambiguous alias error. We've deferred this because we prefer to + # issue the DuplicateResourceName error + for full_node_name, node_list in duplicate_aliases.items(): + existing_alias, node = node_list + raise AmbiguousAliasError( + node_1=existing_alias, node_2=node, duped_name=full_node_name + ) + + def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeConfig) -> None: resource_fqns: Mapping[str, PathSet] = manifest.get_resource_fqns()