From 52ed0a1e5d5b7b7dea971aaede9a8d162f3b47f1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 18 Jul 2024 16:11:48 -0700 Subject: [PATCH 1/8] feat(ingest): improve `ingest deploy` command Makes it easier to actually use ingest deploy programmatically by autogenerating recipe urns when not specified. --- docs/cli.md | 29 +++-- .../src/datahub/cli/ingest_cli.py | 116 ++++++++++-------- 2 files changed, 78 insertions(+), 67 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index ca660f209ec42d..3b02826118f74e 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -102,6 +102,7 @@ Command Options: --test-source-connection When set, ingestion will only test the source connection details from the recipe --no-progress If enabled, mute intermediate progress ingestion reports ``` + #### ingest --dry-run The `--dry-run` option of the `ingest` command performs all of the ingestion steps, except writing to the sink. This is useful to validate that the @@ -139,17 +140,23 @@ The `ingest deploy` command instructs the cli to upload an ingestion recipe to D This command can also be used to schedule the ingestion while uploading or even to update existing sources. It will upload to the remote instance the CLI is connected to, not the sink of the recipe. Use `datahub init` to set the remote if not already set. -To schedule a recipe called "test", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: -````shell -datahub ingest deploy --name "test" --schedule "5 * * * *" --time-zone "Europe/London" -c recipe.yaml -```` +To schedule a recipe called "Snowflake Integration", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: + +```shell +datahub ingest deploy --name "Snowflake Integration" --schedule "5 * * * *" --time-zone "Europe/London" -c recipe.yaml +``` -To update an existing recipe please use the `--urn` parameter to specify the id of the recipe to update. +By default, the ingestion recipe's identifier is generated by hashing the name. +You can specify the recipe name by passing the `--name` flag to the CLI, or by setting `deployment_name` in the recipe. + +This command will automatically create a new recipe if it doesn't exist, or update it if it does. +You can override the urn generation by passing the `--urn` flag to the CLI. **Note:** Updating a recipe will result in a replacement of the existing options with what was specified in the cli command. I.e: Not specifying a schedule in the cli update command will remove the schedule from the recipe to be updated. #### ingest --no-default-report + By default, the cli sends an ingestion report to DataHub, which allows you to see the result of all cli-based ingestion in the UI. This can be turned off with the `--no-default-report` flag. ```shell @@ -253,7 +260,6 @@ true false ``` - ### get The `get` command allows you to easily retrieve metadata from DataHub, by using the REST API. This works for both versioned aspects and timeseries aspects. For timeseries aspects, it fetches the latest value. @@ -314,6 +320,7 @@ Update succeeded with status 200 ``` #### put platform + **🤝 Version Compatibility:** `acryl-datahub>0.8.44.4` The **put platform** command instructs `datahub` to create or update metadata about a data platform. This is very useful if you are using a custom data platform, to set up its logo and display name for a native UI experience. @@ -346,6 +353,7 @@ datahub timeline --urn "urn:li:dataset:(urn:li:dataPlatform:mysql,User.UserAccou The `dataset` command allows you to interact with the dataset entity. The `get` operation can be used to read in a dataset into a yaml file. + ```shell datahub dataset get --urn "$URN" --to-file "$FILE_NAME" ``` @@ -358,7 +366,6 @@ datahub dataset upsert -f dataset.yaml An example of `dataset.yaml` would look like as in [dataset.yaml](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/cli_usage/dataset/dataset.yaml). - ### user (User Entity) The `user` command allows you to interact with the User entity. @@ -411,7 +418,6 @@ members: display_name: "Joe's Hub" ``` - ### dataproduct (Data Product Entity) **🤝 Version Compatibility:** `acryl-datahub>=0.10.2.4` @@ -566,14 +572,12 @@ Use this to delete a Data Product from DataHub. Default to `--soft` which preser # > datahub dataproduct delete --urn "urn:li:dataProduct:pet_of_the_week" --hard ``` - ## Miscellaneous Admin Commands ### lite (experimental) The lite group of commands allow you to run an embedded, lightweight DataHub instance for command line exploration of your metadata. This is intended more for developer tool oriented usage rather than as a production server instance for DataHub. See [DataHub Lite](./datahub_lite.md) for more information about how you can ingest metadata into DataHub Lite and explore your metadata easily. - ### telemetry To help us understand how people are using DataHub, we collect anonymous usage statistics on actions such as command invocations via Mixpanel. @@ -640,7 +644,6 @@ External Entities Affected: None Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)', 'urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)', 'urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)', 'urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)'} ``` - ## Alternate Installation Options ### Using docker @@ -673,7 +676,7 @@ We use a plugin architecture so that you can install only the dependencies you a Please see our [Integrations page](https://datahubproject.io/integrations) if you want to filter on the features offered by each source. | Plugin Name | Install Command | Provides | -|------------------------------------------------------------------------------------------------| ---------------------------------------------------------- | --------------------------------------- | +| ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | --------------------------------------- | | [metadata-file](./generated/ingestion/sources/metadata-file.md) | _included by default_ | File source and sink | | [athena](./generated/ingestion/sources/athena.md) | `pip install 'acryl-datahub[athena]'` | AWS Athena source | | [bigquery](./generated/ingestion/sources/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source | @@ -715,7 +718,7 @@ Please see our [Integrations page](https://datahubproject.io/integrations) if yo ### Sinks | Plugin Name | Install Command | Provides | -|-------------------------------------------------------------------| -------------------------------------------- | -------------------------- | +| ----------------------------------------------------------------- | -------------------------------------------- | -------------------------- | | [metadata-file](../metadata-ingestion/sink_docs/metadata-file.md) | _included by default_ | File source and sink | | [console](../metadata-ingestion/sink_docs/console.md) | _included by default_ | Console sink | | [datahub-rest](../metadata-ingestion/sink_docs/datahub.md) | `pip install 'acryl-datahub[datahub-rest]'` | DataHub sink over REST API | diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index bb8d67f8439ab0..157b194c057e93 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -17,6 +17,7 @@ from datahub.cli import cli_utils from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH from datahub.configuration.config_loader import load_config_file +from datahub.emitter.mce_builder import datahub_guid from datahub.ingestion.graph.client import get_default_graph from datahub.ingestion.run.connection import ConnectionManager from datahub.ingestion.run.pipeline import Pipeline @@ -204,6 +205,15 @@ async def run_ingestion_and_check_upgrade() -> int: # don't raise SystemExit if there's no error +def _make_ingestion_urn(name: str) -> str: + guid = datahub_guid( + { + "name": name, + } + ) + return f"urn:li:dataHubIngestionSource:deploy-{guid}" + + @ingest.command() @upgrade.check_upgrade @telemetry.with_telemetry() @@ -212,7 +222,6 @@ async def run_ingestion_and_check_upgrade() -> int: "--name", type=str, help="Recipe Name", - required=True, ) @click.option( "-c", @@ -224,7 +233,7 @@ async def run_ingestion_and_check_upgrade() -> int: @click.option( "--urn", type=str, - help="Urn of recipe to update. Creates recipe if provided urn does not exist", + help="Urn of recipe to update. If not specified here or in the recipe's pipeline_name, this will create a new ingestion source.", required=False, ) @click.option( @@ -256,7 +265,7 @@ async def run_ingestion_and_check_upgrade() -> int: default="UTC", ) def deploy( - name: str, + name: Optional[str], config: str, urn: Optional[str], executor_id: str, @@ -280,6 +289,31 @@ def deploy( resolve_env_vars=False, ) + deployment_name = pipeline_config.pop("deployment_name", None) + if deployment_name: + if urn: + raise click.UsageError("Cannot specify both --urn and deployment_name") + elif name: + raise click.UsageError("Cannot specify both --name and deployment_name") + + # When urn/name is not specified, we will generate a unique urn based on the deployment name. + name = deployment_name + urn = _make_ingestion_urn(name) + logger.info(f"Will create or update a recipe with urn: {urn}") + elif name: + if not urn: + # When the urn is not specified, generate an urn based on the name. + urn = _make_ingestion_urn(name) + logger.info( + f"No urn was explicitly specified, will create or update the recipe with urn: {urn}" + ) + else: # neither deployment_name nor name is set + raise click.UsageError( + "Either --name must be set or deployment_name specified in the config" + ) + + # Invariant - at this point, both urn and name are set. + variables: dict = { "urn": urn, "name": name, @@ -292,57 +326,31 @@ def deploy( if schedule is not None: variables["schedule"] = {"interval": schedule, "timezone": time_zone} - if urn: - - graphql_query: str = textwrap.dedent( - """ - mutation updateIngestionSource( - $urn: String!, - $name: String!, - $type: String!, - $schedule: UpdateIngestionSourceScheduleInput, - $recipe: String!, - $executorId: String! - $version: String) { - - updateIngestionSource(urn: $urn, input: { - name: $name, - type: $type, - schedule: $schedule, - config: { - recipe: $recipe, - executorId: $executorId, - version: $version, - } - }) - } - """ - ) - else: - logger.info("No URN specified recipe urn, will create a new recipe.") - graphql_query = textwrap.dedent( - """ - mutation createIngestionSource( - $name: String!, - $type: String!, - $schedule: UpdateIngestionSourceScheduleInput, - $recipe: String!, - $executorId: String!, - $version: String) { - - createIngestionSource(input: { - name: $name, - type: $type, - schedule: $schedule, - config: { - recipe: $recipe, - executorId: $executorId, - version: $version, - } - }) - } - """ - ) + # The updateIngestionSource endpoint can actually do upserts as well. + graphql_query: str = textwrap.dedent( + """ + mutation updateIngestionSource( + $urn: String!, + $name: String!, + $type: String!, + $schedule: UpdateIngestionSourceScheduleInput, + $recipe: String!, + $executorId: String! + $version: String) { + + updateIngestionSource(urn: $urn, input: { + name: $name, + type: $type, + schedule: $schedule, + config: { + recipe: $recipe, + executorId: $executorId, + version: $version, + } + }) + } + """ + ) response = datahub_graph.execute_graphql(graphql_query, variables=variables) From d1e53ceaa513180cc212a05cb29e4e0cc3bd95c8 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 19 Jul 2024 15:13:23 -0700 Subject: [PATCH 2/8] expand deployment field capabilities --- docs/cli.md | 70 +++++++++++++------ .../src/datahub/cli/ingest_cli.py | 57 +++++++++++---- 2 files changed, 92 insertions(+), 35 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index 3b02826118f74e..253f3ac4a0703d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -134,27 +134,6 @@ By default `--preview` creates 10 workunits. But if you wish to try producing mo datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml -n --preview --preview-workunits=20 ``` -#### ingest deploy - -The `ingest deploy` command instructs the cli to upload an ingestion recipe to DataHub to be run by DataHub's [UI Ingestion](./ui-ingestion.md). -This command can also be used to schedule the ingestion while uploading or even to update existing sources. It will upload to the remote instance the -CLI is connected to, not the sink of the recipe. Use `datahub init` to set the remote if not already set. - -To schedule a recipe called "Snowflake Integration", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: - -```shell -datahub ingest deploy --name "Snowflake Integration" --schedule "5 * * * *" --time-zone "Europe/London" -c recipe.yaml -``` - -By default, the ingestion recipe's identifier is generated by hashing the name. -You can specify the recipe name by passing the `--name` flag to the CLI, or by setting `deployment_name` in the recipe. - -This command will automatically create a new recipe if it doesn't exist, or update it if it does. -You can override the urn generation by passing the `--urn` flag to the CLI. - -**Note:** Updating a recipe will result in a replacement of the existing options with what was specified in the cli command. -I.e: Not specifying a schedule in the cli update command will remove the schedule from the recipe to be updated. - #### ingest --no-default-report By default, the cli sends an ingestion report to DataHub, which allows you to see the result of all cli-based ingestion in the UI. This can be turned off with the `--no-default-report` flag. @@ -187,6 +166,53 @@ failure_log: filename: ./path/to/failure.json ``` +### ingest deploy + +The `ingest deploy` command instructs the cli to upload an ingestion recipe to DataHub to be run by DataHub's [UI Ingestion](./ui-ingestion.md). +This command can also be used to schedule the ingestion while uploading or even to update existing sources. It will upload to the remote instance the +CLI is connected to, not the sink of the recipe. Use `datahub init` to set the remote if not already set. + +This command will automatically create a new recipe if it doesn't exist, or update it if it does. +Note that this is a complete update, and will remove any options that were previously set. +I.e: Not specifying a schedule in the cli update command will remove the schedule from the recipe to be updated. + +**Basic example** + +To schedule a recipe called "Snowflake Integration", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: + +```shell +datahub ingest deploy --name "Snowflake Integration" --schedule "5 * * * *" --time-zone "Europe/London" -c recipe.yaml +``` + +By default, the ingestion recipe's identifier is generated by hashing the name. +You can override the urn generation by passing the `--urn` flag to the CLI. + +**Using `deployment` to avoid CLI args** + +As an alternative to configuring settings from the CLI, all of these settings can also be set in the `deployment` field of the recipe. + +```yml +# deployment_recipe.yml +deployment: + name: "Snowflake Integration" + schedule: "5 * * * *" + time_zone: "Europe/London" + +source: ... +``` + +```shell +datahub ingest deploy -c deployment_recipe.yml +# Note that when deployment options are specified in the recipe, all other CLI options are ignored. +``` + +This can be particularly useful when you want all recipes should be stored in version control. + +```shell +# Deploy every yml recipe in a directory +ls recipe_directory/*.yml | xargs -n 1 -I {} datahub ingest deploy -c {} +``` + ### init The init command is used to tell `datahub` about where your DataHub instance is located. The CLI will point to localhost DataHub by default. @@ -249,8 +275,6 @@ The [metadata deletion guide](./how/delete-metadata.md) covers the various optio ### exists -**🤝 Version compatibility** : `acryl-datahub>=0.10.2.4` - The exists command can be used to check if an entity exists in DataHub. ```shell diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 157b194c057e93..4d326ca02bf9a1 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -16,6 +16,7 @@ import datahub as datahub_package from datahub.cli import cli_utils from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH +from datahub.configuration.common import ConfigModel from datahub.configuration.config_loader import load_config_file from datahub.emitter.mce_builder import datahub_guid from datahub.ingestion.graph.client import get_default_graph @@ -214,6 +215,14 @@ def _make_ingestion_urn(name: str) -> str: return f"urn:li:dataHubIngestionSource:deploy-{guid}" +class DeployOptions(ConfigModel): + name: str + schedule: Optional[str] = None + time_zone: str = "UTC" + cli_version: Optional[str] = None + executor_id: str = "default" + + @ingest.command() @upgrade.check_upgrade @telemetry.with_telemetry() @@ -289,16 +298,27 @@ def deploy( resolve_env_vars=False, ) - deployment_name = pipeline_config.pop("deployment_name", None) - if deployment_name: + deploy_options_raw = pipeline_config.pop("deployment", None) + if deploy_options_raw is not None: + deploy_options = DeployOptions.parse_obj(deploy_options_raw) + + logger.info(f"Using {repr(deploy_options)}") + if urn: - raise click.UsageError("Cannot specify both --urn and deployment_name") + raise click.UsageError( + "Cannot specify both --urn and deployment field in config" + ) elif name: - raise click.UsageError("Cannot specify both --name and deployment_name") + raise click.UsageError( + "Cannot specify both --name and deployment field in config" + ) + else: + logger.info( + "The deployment field is set in the recipe, any CLI args will be ignored" + ) # When urn/name is not specified, we will generate a unique urn based on the deployment name. - name = deployment_name - urn = _make_ingestion_urn(name) + urn = _make_ingestion_urn(deploy_options.name) logger.info(f"Will create or update a recipe with urn: {urn}") elif name: if not urn: @@ -307,24 +327,37 @@ def deploy( logger.info( f"No urn was explicitly specified, will create or update the recipe with urn: {urn}" ) + + deploy_options = DeployOptions( + name=name, + schedule=schedule, + time_zone=time_zone, + cli_version=cli_version, + executor_id=executor_id, + ) + + logger.info(f"Using {repr(deploy_options)}") else: # neither deployment_name nor name is set raise click.UsageError( "Either --name must be set or deployment_name specified in the config" ) - # Invariant - at this point, both urn and name are set. + # Invariant - at this point, both urn and deploy_options are set. variables: dict = { "urn": urn, - "name": name, + "name": deploy_options.name, "type": pipeline_config["source"]["type"], "recipe": json.dumps(pipeline_config), - "executorId": executor_id, - "version": cli_version, + "executorId": deploy_options.executor_id, + "version": deploy_options.cli_version, } - if schedule is not None: - variables["schedule"] = {"interval": schedule, "timezone": time_zone} + if deploy_options.schedule is not None: + variables["schedule"] = { + "interval": deploy_options.schedule, + "timezone": deploy_options.time_zone, + } # The updateIngestionSource endpoint can actually do upserts as well. graphql_query: str = textwrap.dedent( From 8bfe7e333b20430dae9d8b91ebbe6bc6ec502c07 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 19 Jul 2024 15:20:01 -0700 Subject: [PATCH 3/8] support description field --- metadata-ingestion/src/datahub/cli/ingest_cli.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 4d326ca02bf9a1..693791c6a7f406 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -217,6 +217,7 @@ def _make_ingestion_urn(name: str) -> str: class DeployOptions(ConfigModel): name: str + description: Optional[str] = None schedule: Optional[str] = None time_zone: str = "UTC" cli_version: Optional[str] = None @@ -232,6 +233,12 @@ class DeployOptions(ConfigModel): type=str, help="Recipe Name", ) +@click.option( + "--description", + type=str, + help="Recipe description", + required=False, +) @click.option( "-c", "--config", @@ -275,6 +282,7 @@ class DeployOptions(ConfigModel): ) def deploy( name: Optional[str], + description: Optional[str], config: str, urn: Optional[str], executor_id: str, @@ -330,6 +338,7 @@ def deploy( deploy_options = DeployOptions( name=name, + description=description, schedule=schedule, time_zone=time_zone, cli_version=cli_version, @@ -347,6 +356,7 @@ def deploy( variables: dict = { "urn": urn, "name": deploy_options.name, + "description": deploy_options.description, "type": pipeline_config["source"]["type"], "recipe": json.dumps(pipeline_config), "executorId": deploy_options.executor_id, @@ -365,6 +375,7 @@ def deploy( mutation updateIngestionSource( $urn: String!, $name: String!, + $description: String, $type: String!, $schedule: UpdateIngestionSourceScheduleInput, $recipe: String!, @@ -373,6 +384,7 @@ def deploy( updateIngestionSource(urn: $urn, input: { name: $name, + description: $description, type: $type, schedule: $schedule, config: { From 2ffbb1a6254717de5ffffe71b14e7c862c442d21 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 19 Jul 2024 15:20:34 -0700 Subject: [PATCH 4/8] fix doc --- docs/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cli.md b/docs/cli.md index 253f3ac4a0703d..0cf140c4e36790 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -178,7 +178,7 @@ I.e: Not specifying a schedule in the cli update command will remove the schedul **Basic example** -To schedule a recipe called "Snowflake Integration", to run at 5am everyday, London time with the recipe configured in a local `recipe.yaml` file: +To schedule a recipe called "Snowflake Integration", to run at 5am every day, London time with the recipe configured in a local `recipe.yaml` file: ```shell datahub ingest deploy --name "Snowflake Integration" --schedule "5 * * * *" --time-zone "Europe/London" -c recipe.yaml From bfcb59da9e99ce78b42240460260d7a5a105a83f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 24 Jul 2024 17:59:37 -0700 Subject: [PATCH 5/8] cli override --- docs/cli.md | 1 - .../src/datahub/cli/ingest_cli.py | 65 ++++++++----------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index 0cf140c4e36790..543405efee9ac3 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -203,7 +203,6 @@ source: ... ```shell datahub ingest deploy -c deployment_recipe.yml -# Note that when deployment options are specified in the recipe, all other CLI options are ignored. ``` This can be particularly useful when you want all recipes should be stored in version control. diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 693791c6a7f406..c4672322c7efb9 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -310,46 +310,37 @@ def deploy( if deploy_options_raw is not None: deploy_options = DeployOptions.parse_obj(deploy_options_raw) - logger.info(f"Using {repr(deploy_options)}") - - if urn: - raise click.UsageError( - "Cannot specify both --urn and deployment field in config" - ) - elif name: + if name: + logger.info(f"Overriding deployment name {deploy_options.name} with {name}") + deploy_options.name = name + else: + if not name: raise click.UsageError( - "Cannot specify both --name and deployment field in config" - ) - else: - logger.info( - "The deployment field is set in the recipe, any CLI args will be ignored" + "Either --name must be set or deployment_name specified in the config" ) - + deploy_options = DeployOptions(name=name) + + # Use remaining CLI args to override deploy_options + if description: + deploy_options.description = description + if schedule: + deploy_options.schedule = schedule + if time_zone: + deploy_options.time_zone = time_zone + if cli_version: + deploy_options.cli_version = cli_version + if executor_id: + deploy_options.executor_id = executor_id + + logger.info(f"Using {repr(deploy_options)}") + + if deploy_options.description: + logger.warning("Description was set, but it is not shown anywhere in the UI") + + if not urn: # When urn/name is not specified, we will generate a unique urn based on the deployment name. urn = _make_ingestion_urn(deploy_options.name) - logger.info(f"Will create or update a recipe with urn: {urn}") - elif name: - if not urn: - # When the urn is not specified, generate an urn based on the name. - urn = _make_ingestion_urn(name) - logger.info( - f"No urn was explicitly specified, will create or update the recipe with urn: {urn}" - ) - - deploy_options = DeployOptions( - name=name, - description=description, - schedule=schedule, - time_zone=time_zone, - cli_version=cli_version, - executor_id=executor_id, - ) - - logger.info(f"Using {repr(deploy_options)}") - else: # neither deployment_name nor name is set - raise click.UsageError( - "Either --name must be set or deployment_name specified in the config" - ) + logger.info(f"Using recipe urn: {urn}") # Invariant - at this point, both urn and deploy_options are set. @@ -400,7 +391,7 @@ def deploy( response = datahub_graph.execute_graphql(graphql_query, variables=variables) click.echo( - f"✅ Successfully wrote data ingestion source metadata for recipe {name}:" + f"✅ Successfully wrote data ingestion source metadata for recipe {deploy_options.name}:" ) click.echo(response) From 9a9b10dbfab08ac51e9779a4e99f9b8e980bc0c4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 24 Jul 2024 18:01:06 -0700 Subject: [PATCH 6/8] fix typo --- docs/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cli.md b/docs/cli.md index 543405efee9ac3..c2ad284092d91b 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -205,7 +205,7 @@ source: ... datahub ingest deploy -c deployment_recipe.yml ``` -This can be particularly useful when you want all recipes should be stored in version control. +This is particularly useful when you want all recipes to be stored in version control. ```shell # Deploy every yml recipe in a directory From c83362988c75b963bdc7451d2aab3c2efbd6f2c7 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 25 Jul 2024 17:34:43 -0700 Subject: [PATCH 7/8] remove description --- metadata-ingestion/src/datahub/cli/ingest_cli.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index c4672322c7efb9..346647b9c844c3 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -217,7 +217,6 @@ def _make_ingestion_urn(name: str) -> str: class DeployOptions(ConfigModel): name: str - description: Optional[str] = None schedule: Optional[str] = None time_zone: str = "UTC" cli_version: Optional[str] = None @@ -233,12 +232,6 @@ class DeployOptions(ConfigModel): type=str, help="Recipe Name", ) -@click.option( - "--description", - type=str, - help="Recipe description", - required=False, -) @click.option( "-c", "--config", @@ -282,7 +275,6 @@ class DeployOptions(ConfigModel): ) def deploy( name: Optional[str], - description: Optional[str], config: str, urn: Optional[str], executor_id: str, @@ -321,8 +313,6 @@ def deploy( deploy_options = DeployOptions(name=name) # Use remaining CLI args to override deploy_options - if description: - deploy_options.description = description if schedule: deploy_options.schedule = schedule if time_zone: @@ -334,9 +324,6 @@ def deploy( logger.info(f"Using {repr(deploy_options)}") - if deploy_options.description: - logger.warning("Description was set, but it is not shown anywhere in the UI") - if not urn: # When urn/name is not specified, we will generate a unique urn based on the deployment name. urn = _make_ingestion_urn(deploy_options.name) From 2914870a1bc55727dd045daf493632e10c798b93 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 25 Jul 2024 18:10:37 -0700 Subject: [PATCH 8/8] tweak --- metadata-ingestion/src/datahub/cli/ingest_cli.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 346647b9c844c3..d8f30d8eaf443b 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -334,7 +334,6 @@ def deploy( variables: dict = { "urn": urn, "name": deploy_options.name, - "description": deploy_options.description, "type": pipeline_config["source"]["type"], "recipe": json.dumps(pipeline_config), "executorId": deploy_options.executor_id, @@ -353,7 +352,6 @@ def deploy( mutation updateIngestionSource( $urn: String!, $name: String!, - $description: String, $type: String!, $schedule: UpdateIngestionSourceScheduleInput, $recipe: String!, @@ -362,7 +360,6 @@ def deploy( updateIngestionSource(urn: $urn, input: { name: $name, - description: $description, type: $type, schedule: $schedule, config: {