From a5b4877c72e8bb324f0e821b8dab51b557a4b47c Mon Sep 17 00:00:00 2001 From: treff7es Date: Thu, 1 Aug 2024 22:49:33 +0200 Subject: [PATCH] Update capabilities --- .../datahub/ingestion/source/abs/source.py | 5 ----- .../datahub/ingestion/source/csv_enricher.py | 7 ++++++- .../ingestion/source/dynamodb/dynamodb.py | 5 ----- .../src/datahub/ingestion/source/feast.py | 2 ++ .../datahub/ingestion/source/kafka_connect.py | 1 + .../src/datahub/ingestion/source/mode.py | 4 ++++ .../ingestion/source/powerbi/powerbi.py | 4 ++++ .../src/datahub/ingestion/source/pulsar.py | 1 + .../ingestion/source/qlik_sense/qlik_sense.py | 10 ++++++++++ .../src/datahub/ingestion/source/redash.py | 4 +++- .../ingestion/source/redshift/redshift.py | 4 +++- .../src/datahub/ingestion/source/s3/source.py | 7 +++---- .../datahub/ingestion/source/salesforce.py | 8 ++++++++ .../datahub/ingestion/source/sigma/sigma.py | 4 ++++ .../ingestion/source/sql/sql_common.py | 20 +++++++++++++++++++ .../src/datahub/ingestion/source/superset.py | 1 + .../datahub/ingestion/source/unity/source.py | 3 +++ 17 files changed, 73 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index c9833f69825998..39ebd79c2e2269 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -198,11 +198,6 @@ class TableData: @support_status(SupportStatus.INCUBATING) @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.TAGS, "Can extract ABS object/container tags if enabled") -@capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, -) class ABSSource(StatefulIngestionSourceBase): source_config: DataLakeSourceConfig report: DataLakeSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index feee89ba579837..e3f9a150ad0001 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -12,11 +12,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source_config.csv_enricher import CSVEnricherConfig from datahub.metadata.schema_classes import ( @@ -96,6 +97,10 @@ class CSVEnricherReport(SourceReport): @platform_name("CSV Enricher") @config_class(CSVEnricherConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.DOMAINS, "Supported by default") +@capability(SourceCapability.TAGS, "Supported by default") +@capability(SourceCapability.DESCRIPTIONS, "Supported by default") +@capability(SourceCapability.OWNERSHIP, "Supported by default") class CSVEnricherSource(Source): """ :::tip Looking to ingest a CSV data file into DataHub, as an asset? diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py index 6cab0ffc8f25c5..acda656526ef53 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py @@ -166,11 +166,6 @@ def report_dropped(self, name: str) -> None: SourceCapability.PLATFORM_INSTANCE, "By default, platform_instance will use the AWS account id", ) -@capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, -) class DynamoDBSource(StatefulIngestionSourceBase): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index db0c8e9c39e7bf..e097fd1f221ea5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -96,6 +96,8 @@ class FeastRepositorySourceConfig(ConfigModel): @platform_name("Feast") @config_class(FeastRepositorySourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") @dataclass class FeastRepositorySource(Source): diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 17047457e0eba0..266f9f6db57620 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -1125,6 +1125,7 @@ def transform_connector_config( @config_class(KafkaConnectSourceConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class KafkaConnectSource(StatefulIngestionSourceBase): config: KafkaConnectSourceConfig diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 4b4822bcb98cae..e8599894cdac90 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -205,8 +205,12 @@ def report_dropped_space(self, ent_name: str) -> None: @platform_name("Mode") @config_class(ModeConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.CONTAINERS, "Enabled by default") +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") +@capability(SourceCapability.LINEAGE_FINE, "Supported by default") +@capability(SourceCapability.OWNERSHIP, "Enabled by default") class ModeSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 73f242a06b1d67..e0a72c71a1ef00 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -1220,8 +1220,12 @@ def report_to_datahub_work_units( @platform_name("PowerBI") @config_class(PowerBiDashboardSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.OWNERSHIP, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") +@capability(SourceCapability.TAGS, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Disabled by default, configured using `extract_ownership`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py index 7671e239284305..790c1f918cdfd2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py +++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py @@ -91,6 +91,7 @@ def __init__(self, schema): @config_class(PulsarSourceConfig) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @dataclass class PulsarSource(StatefulIngestionSourceBase): def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext): diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py index b9fd2a9c4fe221..b6c48dd3c488ec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py @@ -93,12 +93,22 @@ @platform_name("Qlik Sense") @config_class(QlikSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability( + SourceCapability.LINEAGE_COARSE, + "Enabled by default.", +) +@capability( + SourceCapability.LINEAGE_FINE, + "Disabled by default. ", +) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `ingest_owner`", ) +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") class QlikSenseSource(StatefulIngestionSourceBase, TestableSource): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index c7a3f25e947dc5..38cf0bebcbc12f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -18,12 +18,13 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( # SourceCapability,; capability, SupportStatus, + capability, config_class, platform_name, support_status, ) from datahub.ingestion.api.registry import import_path -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, @@ -308,6 +309,7 @@ def report_dropped(self, item: str) -> None: @platform_name("Redash") @config_class(RedashConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class RedashSource(Source): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index a6ffed65aaa70c..a9fc9ab8f3e993 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -120,7 +120,7 @@ @platform_name("Redshift") @config_class(RedshiftConfig) @support_status(SupportStatus.CERTIFIED) -@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @@ -129,6 +129,8 @@ SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration (`mixed` or `sql_based` lineage needs to be enabled)", ) +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_statistics`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index c35f500df1b8c7..b8c7fd5aa88fc1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -220,13 +220,12 @@ class TableData: @platform_name("S3 / Local Files", id="s3") @config_class(DataLakeSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") -@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled") @capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, + SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types" ) +@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled") class S3Source(StatefulIngestionSourceBase): source_config: DataLakeSourceConfig report: DataLakeSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 946fdcedc571f8..42128123c61442 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -199,6 +199,14 @@ def report_dropped(self, ent_name: str) -> None: description="Not supported yet", supported=False, ) +@capability( + capability_name=SourceCapability.SCHEMA_METADATA, + description="Enabled by default", +) +@capability( + capability_name=SourceCapability.TAGS, + description="Enabled by default", +) class SalesforceSource(Source): base_url: str config: SalesforceConfig diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index 74d7abb121a3eb..5db5e543510db9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -86,8 +86,12 @@ @platform_name("Sigma") @config_class(SigmaSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default.") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") +@capability(SourceCapability.TAGS, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `ingest_owner`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index b3a5f134c61d64..de3012cc335681 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -320,6 +320,26 @@ class ProfileMetadata: "Optionally enabled via `classification.enabled`", supported=True, ) +@capability( + SourceCapability.SCHEMA_METADATA, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.CONTAINERS, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.DESCRIPTIONS, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.DOMAINS, + "Enabled by default", + supported=True, +) class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource): """A Base class for all SQL Sources that use SQLAlchemy to extend""" diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index dd2dc3301d80ea..0656f13feba64b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -158,6 +158,7 @@ def get_filter_name(filter_obj): @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" ) +@capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class SupersetSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index b29170cb2d705d..9a6cde78cf10d3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -140,6 +140,9 @@ @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") @capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.OWNERSHIP, "Supported via the `include_ownership` config") +@capability( + SourceCapability.DATA_PROFILING, "Supported via the `profiling.enabled` config" +) @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",