From ecacb0562f254aca742d403b2cc60a1f386c5829 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 25 Jun 2024 15:26:18 +0530 Subject: [PATCH 1/8] platform instance for looker dashboards and charts --- .../ingestion/source/looker/looker_config.py | 7 +- .../ingestion/source/looker/looker_source.py | 35 ++++- .../looker/golden_test_ingest.json | 120 ++++++++++++------ .../tests/integration/looker/test_looker.py | 1 + 4 files changed, 115 insertions(+), 48 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 8de213cfabaf0b..f9d5f335c1e431 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -209,12 +209,17 @@ class LookerDashboardSourceConfig( ) extract_independent_looks: bool = Field( False, - description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion should also be enabled.", + description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion " + "should also be enabled.", ) emit_used_explores_only: bool = Field( True, description="When enabled, only explores that are used by a Dashboard/Look will be ingested.", ) + include_looker_element_in_platform_instance: bool = Field( + True, + description="When enabled, platform instance will be added in dashboard and chart urn.", + ) @validator("external_base_url", pre=True, always=True) def external_url_defaults_to_api_config_base_url( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index c87ee1d77f5cd9..d4ab073e3561b5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -620,6 +620,19 @@ def _get_folder_browse_path_v2_entries( if include_current_folder: yield BrowsePathEntryClass(id=urn, urn=urn) + def _make_chart_urn(self, element_id: str) -> str: + if not self.source_config.include_looker_element_in_platform_instance: + return builder.make_chart_urn( + name=element_id, + platform=self.source_config.platform_name, + ) + + return builder.make_chart_urn( + name=element_id, + platform=self.platform, + platform_instance=self.source_config.platform_instance, + ) + def _make_chart_metadata_events( self, dashboard_element: LookerDashboardElement, @@ -627,8 +640,8 @@ def _make_chart_metadata_events( LookerDashboard ], # dashboard will be None if this is a standalone look ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]: - chart_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + chart_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) chart_snapshot = ChartSnapshot( urn=chart_urn, @@ -816,8 +829,16 @@ def _make_dashboard_metadata_events( return proposals def make_dashboard_urn(self, looker_dashboard): + if not self.source_config.include_looker_element_in_platform_instance: + return builder.make_dashboard_urn( + name=looker_dashboard.get_urn_dashboard_id(), + platform=self.source_config.platform_name, + ) + return builder.make_dashboard_urn( - self.source_config.platform_name, looker_dashboard.get_urn_dashboard_id() + name=looker_dashboard.get_urn_dashboard_id(), + platform=self.source_config.platform_name, + platform_instance=self.source_config.platform_instance, ) def _make_explore_metadata_events( @@ -1149,8 +1170,8 @@ def _input_fields_from_dashboard_element( # enrich the input_fields with the fully hydrated ViewField from the now fetched explores for input_field in input_fields: - entity_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + entity_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) view_field_for_reference = input_field.view_field @@ -1217,8 +1238,8 @@ def _make_metrics_dimensions_dashboard_mcp( def _make_metrics_dimensions_chart_mcp( self, dashboard_element: LookerDashboardElement ) -> MetadataChangeProposalWrapper: - chart_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + chart_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) input_fields_aspect = InputFieldsClass( fields=self._input_fields_from_dashboard_element(dashboard_element) diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index ab1c6719609827..f743b34812ea88 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -1,13 +1,14 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "looker", + "instance": "ap-south-1", "env": "PROD", "folder_id": "shared-folder-id" }, @@ -22,7 +23,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -38,12 +39,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:looker" + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" } }, "systemMetadata": { @@ -54,7 +56,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -72,12 +74,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" } @@ -93,7 +99,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { - "urn": "urn:li:chart:(looker,dashboard_elements.2)", + "urn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -120,7 +126,7 @@ "chartUrl": "https://looker.company.com/x/", "inputs": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)" + "string": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)" } ] } @@ -143,7 +149,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -161,18 +167,22 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" }, { - "id": "urn:li:container:691314a7b63628684d62a14861d057a8", - "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + "id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", + "urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" }, { "id": "foo" @@ -189,7 +199,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { - "urn": "urn:li:dashboard:(looker,dashboards.1)", + "urn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "aspects": [ { "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { @@ -197,7 +207,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [ - "urn:li:chart:(looker,dashboard_elements.2)" + "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)" ], "datasets": [], "lastModified": { @@ -236,12 +246,12 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:691314a7b63628684d62a14861d057a8" + "container": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" } }, "systemMetadata": { @@ -252,7 +262,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { @@ -268,18 +278,22 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" }, { - "id": "urn:li:container:691314a7b63628684d62a14861d057a8", - "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + "id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", + "urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" } ] } @@ -292,14 +306,14 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "inputFields", "aspect": { "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)", "schemaField": { "fieldPath": "calc", "nullable": false, @@ -319,7 +333,7 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)", "schemaField": { "fieldPath": "dim1", "nullable": false, @@ -360,7 +374,7 @@ "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)", "schemaField": { "fieldPath": "calc", "nullable": false, @@ -380,7 +394,7 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)", "schemaField": { "fieldPath": "dim1", "nullable": false, @@ -414,13 +428,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "looker", + "instance": "ap-south-1", "env": "PROD", "model_name": "data" }, @@ -435,7 +450,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -451,12 +466,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:looker" + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" } }, "systemMetadata": { @@ -467,7 +483,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -485,12 +501,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Explore" } @@ -506,7 +526,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -540,7 +560,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.lkml_samples.view.underlying_view,PROD)", "type": "VIEW" } ] @@ -602,7 +622,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -620,7 +640,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { @@ -636,12 +656,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "container": "urn:li:container:63e49aaeb15b289d177acbb32625d577" } }, "systemMetadata": { @@ -652,18 +672,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Explore" }, { - "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", - "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "id": "urn:li:container:63e49aaeb15b289d177acbb32625d577", + "urn": "urn:li:container:63e49aaeb15b289d177acbb32625d577" } ] } @@ -734,6 +758,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index 3e049f8b2ef4e0..cfde2b8feb7cdd 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -94,6 +94,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time): "client_id": "foo", "client_secret": "bar", "extract_usage_history": False, + "platform_instance": "ap-south-1", }, }, "sink": { From f9ba34d03e2a0ce839892839b8a37c72ac873f5e Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Thu, 27 Jun 2024 08:47:09 +0530 Subject: [PATCH 2/8] include_looker_element_in_platform_instance is default to false --- .../src/datahub/ingestion/source/looker/looker_config.py | 2 +- metadata-ingestion/tests/integration/looker/test_looker.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index f9d5f335c1e431..7d42c566e39561 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -217,7 +217,7 @@ class LookerDashboardSourceConfig( description="When enabled, only explores that are used by a Dashboard/Look will be ingested.", ) include_looker_element_in_platform_instance: bool = Field( - True, + False, description="When enabled, platform instance will be added in dashboard and chart urn.", ) diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index cfde2b8feb7cdd..31703043af3b53 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -95,6 +95,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time): "client_secret": "bar", "extract_usage_history": False, "platform_instance": "ap-south-1", + "include_looker_element_in_platform_instance": True, }, }, "sink": { From d22961289d59073591a90d5b002fc3f4ae54859d Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 1 Jul 2024 13:03:03 +0530 Subject: [PATCH 3/8] address review comments --- .../ingestion/source/looker/looker_source.py | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index a178ff266b0578..7d8f1605f8c772 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -621,17 +621,15 @@ def _get_folder_browse_path_v2_entries( yield BrowsePathEntryClass(id=urn, urn=urn) def _make_chart_urn(self, element_id: str) -> str: - if not self.source_config.include_looker_element_in_platform_instance: - return builder.make_chart_urn( - name=element_id, - platform=self.source_config.platform_name, - ) + urn_params: dict = { + "name": element_id, + "platform": self.source_config.platform_name, + } - return builder.make_chart_urn( - name=element_id, - platform=self.platform, - platform_instance=self.source_config.platform_instance, - ) + if self.source_config.include_looker_element_in_platform_instance: + urn_params["platform_instance"] = self.source_config.platform_instance + + return builder.make_chart_urn(**urn_params) def _make_chart_metadata_events( self, @@ -829,17 +827,15 @@ def _make_dashboard_metadata_events( return proposals def make_dashboard_urn(self, looker_dashboard): - if not self.source_config.include_looker_element_in_platform_instance: - return builder.make_dashboard_urn( - name=looker_dashboard.get_urn_dashboard_id(), - platform=self.source_config.platform_name, - ) + urn_params: dict = { + "name": looker_dashboard.get_urn_dashboard_id(), + "platform": self.source_config.platform_name, + } - return builder.make_dashboard_urn( - name=looker_dashboard.get_urn_dashboard_id(), - platform=self.source_config.platform_name, - platform_instance=self.source_config.platform_instance, - ) + if self.source_config.include_looker_element_in_platform_instance: + urn_params["platform_instance"] = self.source_config.platform_instance + + return builder.make_dashboard_urn(**urn_params) def _make_explore_metadata_events( self, From 9240607b31026346f31f042e66f2a04c321df8a7 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 2 Jul 2024 14:58:52 +0530 Subject: [PATCH 4/8] added platform instance aspect --- .../ingestion/source/looker/looker_source.py | 21 +++++++++++- .../looker/golden_test_ingest.json | 34 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 7d8f1605f8c772..cf3249a3ea6b39 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -80,7 +80,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, - Status, + Status, DataPlatformInstance, ) from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( ChartSnapshot, @@ -620,6 +620,21 @@ def _get_folder_browse_path_v2_entries( if include_current_folder: yield BrowsePathEntryClass(id=urn, urn=urn) + def _add_platform_instance_aspect(self, urn: str, proposals: List[MetadataChangeProposalWrapper]) -> None: + if self.source_config.include_looker_element_in_platform_instance: + proposals.append( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=DataPlatformInstance( + platform=builder.make_data_platform_urn(self.source_config.platform_name), + instance=builder.make_dataplatform_instance_urn( + platform=self.source_config.platform_name, + instance=self.source_config.platform_instance, + ), + ), + ), + ) + def _make_chart_urn(self, element_id: str) -> str: urn_params: dict = { "name": element_id, @@ -719,6 +734,8 @@ def _make_chart_metadata_events( ), ] + self._add_platform_instance_aspect(urn=chart_urn, proposals=proposals) + # If extracting embeds is enabled, produce an MCP for embed URL. if ( self.source_config.extract_embed_urls @@ -824,6 +841,8 @@ def _make_dashboard_metadata_events( ) ) + self._add_platform_instance_aspect(urn=dashboard_urn, proposals=proposals) + return proposals def make_dashboard_urn(self, looker_dashboard): diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index 9e3ea443f273a7..ddf96241b4e7a8 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -165,6 +165,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", @@ -276,6 +293,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", From 71a146caec7f2a574f6623606a602a6967aa41cf Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 2 Jul 2024 15:08:10 +0530 Subject: [PATCH 5/8] lint fix --- .../ingestion/source/looker/looker_source.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index cf3249a3ea6b39..8a0eb80305e5a9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -80,7 +80,8 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, - Status, DataPlatformInstance, + DataPlatformInstance, + Status, ) from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( ChartSnapshot, @@ -620,13 +621,23 @@ def _get_folder_browse_path_v2_entries( if include_current_folder: yield BrowsePathEntryClass(id=urn, urn=urn) - def _add_platform_instance_aspect(self, urn: str, proposals: List[MetadataChangeProposalWrapper]) -> None: + def _add_platform_instance_aspect( + self, + urn: str, + proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]], + ) -> None: if self.source_config.include_looker_element_in_platform_instance: + + assert self.source_config.platform_name + assert self.source_config.platform_instance + proposals.append( MetadataChangeProposalWrapper( entityUrn=urn, aspect=DataPlatformInstance( - platform=builder.make_data_platform_urn(self.source_config.platform_name), + platform=builder.make_data_platform_urn( + self.source_config.platform_name + ), instance=builder.make_dataplatform_instance_urn( platform=self.source_config.platform_name, instance=self.source_config.platform_instance, From 02b3af07e14f9ae9b8b0d34e6586c3be2b151072 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 15 Jul 2024 15:00:35 +0530 Subject: [PATCH 6/8] address review comments --- .../ingestion/source/looker/looker_config.py | 2 +- .../ingestion/source/looker/looker_source.py | 66 ++++++++++--------- .../tests/integration/looker/test_looker.py | 2 +- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index c0e85847b338b8..bcf6d380a60fd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -293,7 +293,7 @@ class LookerDashboardSourceConfig( True, description="When enabled, only explores that are used by a Dashboard/Look will be ingested.", ) - include_looker_element_in_platform_instance: bool = Field( + include_platform_instance_in_urns: bool = Field( False, description="When enabled, platform instance will be added in dashboard and chart urn.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 13ce8993d7b038..abaaa47363f7b5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -625,41 +625,33 @@ def _get_folder_browse_path_v2_entries( if include_current_folder: yield BrowsePathEntryClass(id=urn, urn=urn) - def _add_platform_instance_aspect( + def _create_platform_instance_aspect( self, - urn: str, - proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]], - ) -> None: - if self.source_config.include_looker_element_in_platform_instance: + ) -> DataPlatformInstance: - assert self.source_config.platform_name - assert self.source_config.platform_instance + assert self.source_config.platform_name + assert self.source_config.platform_instance - proposals.append( - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=DataPlatformInstance( - platform=builder.make_data_platform_urn( - self.source_config.platform_name - ), - instance=builder.make_dataplatform_instance_urn( - platform=self.source_config.platform_name, - instance=self.source_config.platform_instance, - ), - ), - ), - ) + return DataPlatformInstance( + platform=builder.make_data_platform_urn(self.source_config.platform_name), + instance=builder.make_dataplatform_instance_urn( + platform=self.source_config.platform_name, + instance=self.source_config.platform_instance, + ), + ) def _make_chart_urn(self, element_id: str) -> str: - urn_params: dict = { - "name": element_id, - "platform": self.source_config.platform_name, - } - if self.source_config.include_looker_element_in_platform_instance: - urn_params["platform_instance"] = self.source_config.platform_instance + platform_instance: Optional[str] = None + + if self.source_config.include_platform_instance_in_urns: + platform_instance = self.source_config.platform_instance - return builder.make_chart_urn(**urn_params) + return builder.make_chart_urn( + name=element_id, + platform=self.source_config.platform_name, + platform_instance=platform_instance, + ) def _make_chart_metadata_events( self, @@ -749,7 +741,13 @@ def _make_chart_metadata_events( ), ] - self._add_platform_instance_aspect(urn=chart_urn, proposals=proposals) + if self.source_config.include_platform_instance_in_urns: + proposals.append( + MetadataChangeProposalWrapper( + entityUrn=chart_urn, + aspect=self._create_platform_instance_aspect(), + ), + ) # If extracting embeds is enabled, produce an MCP for embed URL. if ( @@ -856,7 +854,13 @@ def _make_dashboard_metadata_events( ) ) - self._add_platform_instance_aspect(urn=dashboard_urn, proposals=proposals) + if self.source_config.include_platform_instance_in_urns: + proposals.append( + MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=self._create_platform_instance_aspect(), + ) + ) return proposals @@ -866,7 +870,7 @@ def make_dashboard_urn(self, looker_dashboard): "platform": self.source_config.platform_name, } - if self.source_config.include_looker_element_in_platform_instance: + if self.source_config.include_platform_instance_in_urns: urn_params["platform_instance"] = self.source_config.platform_instance return builder.make_dashboard_urn(**urn_params) diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index c9f43acedcd1aa..fdc9c45fcf5396 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -95,7 +95,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time): "client_secret": "bar", "extract_usage_history": False, "platform_instance": "ap-south-1", - "include_looker_element_in_platform_instance": True, + "include_platform_instance_in_urns": True, }, }, "sink": { From c950d3dbe85d0a73374f4593e5fb7f6d4e1770a9 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 16 Jul 2024 11:48:44 +0530 Subject: [PATCH 7/8] update make_dashboard_urn --- .../ingestion/source/looker/looker_source.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index abaaa47363f7b5..c5acb80b98169d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -629,8 +629,12 @@ def _create_platform_instance_aspect( self, ) -> DataPlatformInstance: - assert self.source_config.platform_name - assert self.source_config.platform_instance + assert ( + self.source_config.platform_name + ), "Platform name is not set in the configuration." + assert ( + self.source_config.platform_instance + ), "Platform instance is not set in the configuration." return DataPlatformInstance( platform=builder.make_data_platform_urn(self.source_config.platform_name), @@ -865,15 +869,16 @@ def _make_dashboard_metadata_events( return proposals def make_dashboard_urn(self, looker_dashboard): - urn_params: dict = { - "name": looker_dashboard.get_urn_dashboard_id(), - "platform": self.source_config.platform_name, - } + platform_instance: Optional[str] = None if self.source_config.include_platform_instance_in_urns: - urn_params["platform_instance"] = self.source_config.platform_instance + platform_instance = self.source_config.platform_instance - return builder.make_dashboard_urn(**urn_params) + return builder.make_dashboard_urn( + name=looker_dashboard.get_urn_dashboard_id(), + platform=self.source_config.platform_name, + platform_instance=platform_instance, + ) def _make_explore_metadata_events( self, From 9b97cb7a0b89e4567808cde3faf000c445164651 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Fri, 19 Jul 2024 12:07:10 +0530 Subject: [PATCH 8/8] emit dataplatformInstance entity --- .../ingestion/source/looker/looker_source.py | 21 ++++++++++++ .../looker/golden_test_ingest.json | 34 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index c5acb80b98169d..358da7f40817fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -96,11 +96,13 @@ ChartTypeClass, ContainerClass, DashboardInfoClass, + DataPlatformInfoClass, InputFieldClass, InputFieldsClass, OwnerClass, OwnershipClass, OwnershipTypeClass, + PlatformTypeClass, SubTypesClass, ) from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor @@ -1570,6 +1572,25 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = [] + # Emit platform instance entity + if self.source_config.platform_instance: + platform_instance_urn = builder.make_dataplatform_instance_urn( + platform=self.source_config.platform_name, + instance=self.source_config.platform_instance, + ) + + yield MetadataWorkUnit( + id=f"{platform_instance_urn}-aspect-dataplatformInfo", + mcp=MetadataChangeProposalWrapper( + entityUrn=platform_instance_urn, + aspect=DataPlatformInfoClass( + name=self.source_config.platform_instance, + type=PlatformTypeClass.OTHERS, + datasetNameDelimiter=".", + ), + ), + ) + with self.reporter.report_stage("dashboard_chart_metadata"): for job in BackpressureAwareExecutor.map( self.process_dashboard, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index ddf96241b4e7a8..e10fc7eeb5a9f2 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -1,4 +1,22 @@ [ +{ + "entityType": "dataPlatformInstance", + "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInfo", + "aspect": { + "json": { + "name": "ap-south-1", + "type": "OTHERS", + "datasetNameDelimiter": "." + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", @@ -802,6 +820,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataPlatformInstance", + "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension",