diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 4ad9635069b156..bcf6d380a60fd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -286,12 +286,17 @@ class LookerDashboardSourceConfig( ) extract_independent_looks: bool = Field( False, - description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion should also be enabled.", + description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion " + "should also be enabled.", ) emit_used_explores_only: bool = Field( True, description="When enabled, only explores that are used by a Dashboard/Look will be ingested.", ) + include_platform_instance_in_urns: bool = Field( + False, + description="When enabled, platform instance will be added in dashboard and chart urn.", + ) @validator("external_base_url", pre=True, always=True) def external_url_defaults_to_api_config_base_url( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index cd050fec35c2c7..53f2e1fefce7b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -80,6 +80,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, + DataPlatformInstance, Status, ) from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( @@ -95,11 +96,13 @@ ChartTypeClass, ContainerClass, DashboardInfoClass, + DataPlatformInfoClass, InputFieldClass, InputFieldsClass, OwnerClass, OwnershipClass, OwnershipTypeClass, + PlatformTypeClass, SubTypesClass, ) from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor @@ -624,6 +627,38 @@ def _get_folder_browse_path_v2_entries( if include_current_folder: yield BrowsePathEntryClass(id=urn, urn=urn) + def _create_platform_instance_aspect( + self, + ) -> DataPlatformInstance: + + assert ( + self.source_config.platform_name + ), "Platform name is not set in the configuration." + assert ( + self.source_config.platform_instance + ), "Platform instance is not set in the configuration." + + return DataPlatformInstance( + platform=builder.make_data_platform_urn(self.source_config.platform_name), + instance=builder.make_dataplatform_instance_urn( + platform=self.source_config.platform_name, + instance=self.source_config.platform_instance, + ), + ) + + def _make_chart_urn(self, element_id: str) -> str: + + platform_instance: Optional[str] = None + + if self.source_config.include_platform_instance_in_urns: + platform_instance = self.source_config.platform_instance + + return builder.make_chart_urn( + name=element_id, + platform=self.source_config.platform_name, + platform_instance=platform_instance, + ) + def _make_chart_metadata_events( self, dashboard_element: LookerDashboardElement, @@ -631,8 +666,8 @@ def _make_chart_metadata_events( LookerDashboard ], # dashboard will be None if this is a standalone look ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]: - chart_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + chart_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) chart_snapshot = ChartSnapshot( urn=chart_urn, @@ -713,6 +748,14 @@ def _make_chart_metadata_events( ), ] + if self.source_config.include_platform_instance_in_urns: + proposals.append( + MetadataChangeProposalWrapper( + entityUrn=chart_urn, + aspect=self._create_platform_instance_aspect(), + ), + ) + # If extracting embeds is enabled, produce an MCP for embed URL. if ( self.source_config.extract_embed_urls @@ -818,11 +861,26 @@ def _make_dashboard_metadata_events( ) ) + if self.source_config.include_platform_instance_in_urns: + proposals.append( + MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=self._create_platform_instance_aspect(), + ) + ) + return proposals def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str: + platform_instance: Optional[str] = None + + if self.source_config.include_platform_instance_in_urns: + platform_instance = self.source_config.platform_instance + return builder.make_dashboard_urn( - self.source_config.platform_name, looker_dashboard.get_urn_dashboard_id() + name=looker_dashboard.get_urn_dashboard_id(), + platform=self.source_config.platform_name, + platform_instance=platform_instance, ) def _make_explore_metadata_events( @@ -1154,8 +1212,8 @@ def _input_fields_from_dashboard_element( # enrich the input_fields with the fully hydrated ViewField from the now fetched explores for input_field in input_fields: - entity_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + entity_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) view_field_for_reference = input_field.view_field @@ -1220,8 +1278,8 @@ def _make_metrics_dimensions_dashboard_mcp( def _make_metrics_dimensions_chart_mcp( self, dashboard_element: LookerDashboardElement ) -> MetadataChangeProposalWrapper: - chart_urn = builder.make_chart_urn( - self.source_config.platform_name, dashboard_element.get_urn_element_id() + chart_urn = self._make_chart_urn( + element_id=dashboard_element.get_urn_element_id() ) input_fields_aspect = InputFieldsClass( fields=self._input_fields_from_dashboard_element(dashboard_element) @@ -1513,6 +1571,25 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = [] + # Emit platform instance entity + if self.source_config.platform_instance: + platform_instance_urn = builder.make_dataplatform_instance_urn( + platform=self.source_config.platform_name, + instance=self.source_config.platform_instance, + ) + + yield MetadataWorkUnit( + id=f"{platform_instance_urn}-aspect-dataplatformInfo", + mcp=MetadataChangeProposalWrapper( + entityUrn=platform_instance_urn, + aspect=DataPlatformInfoClass( + name=self.source_config.platform_instance, + type=PlatformTypeClass.OTHERS, + datasetNameDelimiter=".", + ), + ), + ) + with self.reporter.report_stage("dashboard_chart_metadata"): for job in BackpressureAwareExecutor.map( self.process_dashboard, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index 639e69a6f82059..76c8f04e8447a7 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -1,13 +1,32 @@ [ +{ + "entityType": "dataPlatformInstance", + "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInfo", + "aspect": { + "json": { + "name": "ap-south-1", + "type": "OTHERS", + "datasetNameDelimiter": "." + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "looker", + "instance": "ap-south-1", "env": "PROD", "folder_id": "shared-folder-id" }, @@ -22,7 +41,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -38,12 +57,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:looker" + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" } }, "systemMetadata": { @@ -54,7 +74,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -72,12 +92,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" } @@ -93,7 +117,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { - "urn": "urn:li:chart:(looker,dashboard_elements.2)", + "urn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -120,7 +144,7 @@ "chartUrl": "https://looker.company.com/x/", "inputs": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)" + "string": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)" } ] } @@ -143,7 +167,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -161,22 +185,43 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" }, { - "id": "urn:li:container:691314a7b63628684d62a14861d057a8", - "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + "id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", + "urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" }, { - "id": "urn:li:dashboard:(looker,dashboards.1)", - "urn": "urn:li:dashboard:(looker,dashboards.1)" + "id": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", + "urn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)" } ] } @@ -190,7 +235,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { - "urn": "urn:li:dashboard:(looker,dashboards.1)", + "urn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "aspects": [ { "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { @@ -198,7 +243,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [ - "urn:li:chart:(looker,dashboard_elements.2)" + "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)" ], "datasets": [], "lastModified": { @@ -237,12 +282,12 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:691314a7b63628684d62a14861d057a8" + "container": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" } }, "systemMetadata": { @@ -253,7 +298,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { @@ -269,18 +314,39 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Folders" }, { - "id": "urn:li:container:691314a7b63628684d62a14861d057a8", - "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + "id": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7", + "urn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7" } ] } @@ -293,14 +359,14 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "entityUrn": "urn:li:chart:(looker,ap-south-1.dashboard_elements.2)", "changeType": "UPSERT", "aspectName": "inputFields", "aspect": { "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)", "schemaField": { "fieldPath": "calc", "nullable": false, @@ -317,7 +383,7 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)", "schemaField": { "fieldPath": "dim1", "nullable": false, @@ -351,14 +417,14 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "entityUrn": "urn:li:dashboard:(looker,ap-south-1.dashboards.1)", "changeType": "UPSERT", "aspectName": "inputFields", "aspect": { "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,ap-south-1.dashboard_elements.2),calc)", "schemaField": { "fieldPath": "calc", "nullable": false, @@ -375,7 +441,7 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD),dim1)", "schemaField": { "fieldPath": "dim1", "nullable": false, @@ -409,13 +475,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "looker", + "instance": "ap-south-1", "env": "PROD", "model_name": "data" }, @@ -430,7 +497,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -446,12 +513,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:looker" + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" } }, "systemMetadata": { @@ -462,7 +530,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -480,12 +548,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "entityUrn": "urn:li:container:63e49aaeb15b289d177acbb32625d577", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Explore" } @@ -501,7 +573,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -535,7 +607,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.lkml_samples.view.underlying_view,PROD)", "type": "VIEW" } ] @@ -597,7 +669,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -615,7 +687,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { @@ -631,12 +703,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "container": "urn:li:container:63e49aaeb15b289d177acbb32625d577" } }, "systemMetadata": { @@ -647,18 +719,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,ap-south-1.data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + }, { "id": "Explore" }, { - "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", - "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "id": "urn:li:container:63e49aaeb15b289d177acbb32625d577", + "urn": "urn:li:container:63e49aaeb15b289d177acbb32625d577" } ] } @@ -729,6 +805,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataPlatformInstance", + "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index e1cedee33dcb6d..fdc9c45fcf5396 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -94,6 +94,8 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time): "client_id": "foo", "client_secret": "bar", "extract_usage_history": False, + "platform_instance": "ap-south-1", + "include_platform_instance_in_urns": True, }, }, "sink": {