From 5d31215a41dcc032427235bc6c3cc348a2739000 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 3 Dec 2024 15:33:21 -0500 Subject: [PATCH 1/2] fix(ingest/tableau): make sites.get_by_id call optional --- .../ingestion/source/tableau/tableau.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 0eafdb4ad23ba0..605078e5fc844d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -68,6 +68,7 @@ CapabilityReport, MetadataWorkUnitProcessor, Source, + StructuredLogLevel, TestableSource, TestConnectionReport, ) @@ -700,6 +701,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: config=self.config, ctx=self.ctx, site=site, + site_id=site.id, report=self.report, server=self.server, platform=self.platform, @@ -707,11 +709,19 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: logger.info(f"Ingesting assets of site '{site.content_url}'.") yield from site_source.ingest_tableau_site() else: - site = self.server.sites.get_by_id(self.server.site_id) + site = None + with self.report.report_exc( + title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.", + message="This usually indicates missing permissions. Ensure that you have all necessary permissions.", + level=StructuredLogLevel.WARN, + ): + site = self.server.sites.get_by_id(self.server.site_id) + site_source = TableauSiteSource( config=self.config, ctx=self.ctx, site=site, + site_id=self.server.site_id, report=self.report, server=self.server, platform=self.platform, @@ -743,7 +753,8 @@ def __init__( self, config: TableauConfig, ctx: PipelineContext, - site: SiteItem, + site: Optional[SiteItem], + site_id: Optional[str], report: TableauSourceReport, server: Server, platform: str, @@ -752,9 +763,16 @@ def __init__( self.report = report self.server: Server = server self.ctx: PipelineContext = ctx - self.site: SiteItem = site self.platform = platform + self.site: Optional[SiteItem] = site + if site_id is not None: + self.site_id: str = site_id + else: + assert self.site is not None, "site or site_id is required" + assert self.site.id is not None, "site_id is required when site is provided" + self.site_id = self.site.id + self.database_tables: Dict[str, DatabaseTable] = {} self.tableau_stat_registry: Dict[str, UsageStat] = {} self.tableau_project_registry: Dict[str, TableauProject] = {} @@ -3181,10 +3199,10 @@ def emit_project_in_topological_order( else: # This is a root Tableau project since the parent_project_id is None. # For a root project, either the site is the parent, or the platform is the default parent. - if self.config.add_site_container and self.site and self.site.id: + if self.config.add_site_container: # The site containers have already been generated by emit_site_container, so we # don't need to emit them again here. - parent_project_key = self.gen_site_key(self.site.id) + parent_project_key = self.gen_site_key(self.site_id) yield from gen_containers( container_key=project_key, @@ -3201,12 +3219,12 @@ def emit_project_in_topological_order( yield from emit_project_in_topological_order(project) def emit_site_container(self): - if not self.site or not self.site.id: + if not self.site: logger.warning("Can not ingest site container. No site information found.") return yield from gen_containers( - container_key=self.gen_site_key(self.site.id), + container_key=self.gen_site_key(self.site_id), name=self.site.name or "Default", sub_types=[c.SITE], ) From 19e8f491e07f2afb5fca6065a682399b2fc3a39e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 3 Dec 2024 16:05:17 -0500 Subject: [PATCH 2/2] fix tests --- .../src/datahub/ingestion/source/tableau/tableau.py | 2 +- .../tests/integration/tableau/test_tableau_ingest.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 605078e5fc844d..2fae4ca2e98257 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -826,7 +826,7 @@ def dataset_browse_prefix(self) -> str: def _re_authenticate(self): tableau_auth: Union[ TableauAuth, PersonalAccessTokenAuth - ] = self.config.get_tableau_auth(self.site.content_url) + ] = self.config.get_tableau_auth(self.site_id) self.server.auth.sign_in(tableau_auth) @property diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 6c45b8a47de412..38a53b323876d1 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1028,6 +1028,7 @@ def check_lineage_metadata( ctx=context, platform="tableau", site=SiteItem(name="Site 1", content_url="site1"), + site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), ) @@ -1248,6 +1249,7 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra config=mock.MagicMock(), ctx=mock.MagicMock(), site=mock.MagicMock(), + site_id=None, server=mock_sdk.return_value, report=reporter, )