From 395f151704ed19a590eb06cc50444d0a9b74ff39 Mon Sep 17 00:00:00 2001 From: Miguel Peralvo Date: Sun, 21 Jun 2020 10:04:53 +0100 Subject: [PATCH 01/14] Updating pipeline: data_ml_pipeline --- resources/adf/pipeline/data_ml_pipeline.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/adf/pipeline/data_ml_pipeline.json b/resources/adf/pipeline/data_ml_pipeline.json index 803c70f..18d9755 100644 --- a/resources/adf/pipeline/data_ml_pipeline.json +++ b/resources/adf/pipeline/data_ml_pipeline.json @@ -79,8 +79,10 @@ "typeProperties": { "pythonFile": "dbfs:/FileStore/Shared/db-automation/ML/batch_model.py", "parameters": [ - "-m mlops-wine-model -o -r dbfs:/FileStore/Shared/db-automation -t test_table", - "@pipeline().parameters.environment" + "-m mlops-wine-model", + "@pipeline().parameters.environment", + "-r dbfs:/FileStore/Shared/db-automation", + "-t wine_output_table_1" ] }, "linkedServiceName": { From 54c174ab54e8e16b67147d40e94a78dcc8effe63 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 10:22:12 +0100 Subject: [PATCH 02/14] Try to infer end to end. --- cicd-scripts/remote_registry_mlflow.py | 8 ++++-- pipeline/ML/inference/batch_model.py | 38 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/cicd-scripts/remote_registry_mlflow.py b/cicd-scripts/remote_registry_mlflow.py index 5320710..dc84aa9 100644 --- a/cicd-scripts/remote_registry_mlflow.py +++ b/cicd-scripts/remote_registry_mlflow.py @@ -11,6 +11,7 @@ from mlflow.utils.string_utils import strip_prefix from mlflow.exceptions import MlflowException from mlflow.tracking import artifact_utils +from mlflow.tracking import MlflowClient def _get_dbfs_endpoint(artifact_uri, artifact_path): @@ -64,11 +65,13 @@ def main(): parser = argparse.ArgumentParser(description="Execute python scripts in Databricks") parser.add_argument("-o", "--output_local_path", help="Output path where the artifacts will be written", required=True) parser.add_argument("-m", "--model_name", help="Model Registry Name", required=True) + parser.add_argument("-s", "--stage", help="Stage", default="staging", required=False) args = parser.parse_args() model_name = args.model_name output_local_path = args.output_local_path + stage = args.stage cli_profile_name = "registry" # TODO: Document that we assume that the registry profile will be created in the local machine: @@ -78,11 +81,11 @@ def main(): TRACKING_URI = f"databricks://{cli_profile_name}" print(f"TRACKING_URI: {TRACKING_URI}") artifact_path = 'model' - from mlflow.tracking import MlflowClient + remote_client = MlflowClient(tracking_uri=TRACKING_URI) mlflow.set_tracking_uri(TRACKING_URI) # client = mlflow.tracking.MlflowClient() - latest_model = remote_client.get_latest_versions(name=model_name, stages=["staging"]) + latest_model = remote_client.get_latest_versions(name=model_name, stages=[stage]) print(f"Latest Model: {latest_model}") run_id = latest_model[0].run_id artifact_uri = artifact_utils.get_artifact_uri(run_id) @@ -103,7 +106,6 @@ def main(): # remote_client = MlflowClient(tracking_uri=TRACKING_URI) - if __name__ == '__main__': main() diff --git a/pipeline/ML/inference/batch_model.py b/pipeline/ML/inference/batch_model.py index f1e22d8..1cd62be 100644 --- a/pipeline/ML/inference/batch_model.py +++ b/pipeline/ML/inference/batch_model.py @@ -3,6 +3,8 @@ import requests import mlflow import mlflow.sklearn +from mlflow.tracking import MlflowClient +from mlflow.tracking import artifact_utils from mlflow import pyfunc import json from pyspark.sql.functions import col @@ -62,6 +64,42 @@ def main(): wine_df = wine_df.select(*(col(column).cast("float").alias(column.replace(" ", "_")) for column in wine_df.columns)) data_spark = wine_df + # Pointing to the right model registry + host = dbutils.secrets.get(scope = "azure-demo-mlflow", key = "mlflow_host_registry") + token = dbutils.secrets.get(scope="azure-demo-mlflow", key="mlflow_token_registry") + cli_profile_name = 'registry' + dbutils.fs.put("file:///root/.databrickscfg", "[%s]\nhost=%s\ntoken=%s" % (cli_profile_name, host, token), + overwrite=True) + + TRACKING_URI = "databricks://%s" % cli_profile_name + print(TRACKING_URI) + remote_client = MlflowClient(tracking_uri=TRACKING_URI) + mlflow.set_tracking_uri(TRACKING_URI) + artifact_path = 'model' + + latest_model = remote_client.get_latest_versions(name=model_name, stages=[stage]) + print(f"Latest Model: {latest_model}") + run_id = latest_model[0].run_id + artifact_uri = artifact_utils.get_artifact_uri(run_id) + print(f"artifact_uri: {artifact_uri}") + model_uri = f"runs:/{latest_model[0].run_id}/{artifact_path}" + print(f"model_uri: {model_uri}") + udf = pyfunc.spark_udf(spark, model_uri) + + # data_spark = spark.read.csv(dbfs_wine_data_path, header=True) + predictions = data_spark.select(udf(*data_spark.columns).alias('prediction'), "*") + + spark.sql(f"CREATE DATABASE IF NOT EXISTS {db}") + spark.sql(f"DROP TABLE IF EXISTS {db}.{ml_output_predictions_table}") + predictions.write.format("delta").mode("overwrite").saveAsTable(f"{db}.{ml_output_predictions_table}") + + output = json.dumps({ + "model_name": model_name, + "model_uri": model_uri + }) + + print(output) + From 9941832c4e529e78355aac3ef06317d7e888646e Mon Sep 17 00:00:00 2001 From: Miguel Peralvo Date: Sun, 21 Jun 2020 10:42:54 +0100 Subject: [PATCH 03/14] Updating pipeline: data_ml_pipeline --- resources/adf/pipeline/data_ml_pipeline.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/adf/pipeline/data_ml_pipeline.json b/resources/adf/pipeline/data_ml_pipeline.json index 18d9755..e267fc1 100644 --- a/resources/adf/pipeline/data_ml_pipeline.json +++ b/resources/adf/pipeline/data_ml_pipeline.json @@ -81,7 +81,7 @@ "parameters": [ "-m mlops-wine-model", "@pipeline().parameters.environment", - "-r dbfs:/FileStore/Shared/db-automation", + "-r /FileStore/Shared/db-automation", "-t wine_output_table_1" ] }, From e7c1eb1ebfe49042e47e34cd303619c28d18c0c5 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 10:50:46 +0100 Subject: [PATCH 04/14] Remove blank spaces. --- azure-pipelines.yml | 2 +- pipeline/ML/inference/batch_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8059d24..6765338 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -146,7 +146,7 @@ stages: echo "" echo "-------------" dbfs cp --overwrite $(Build.Repository.LocalPath)/"pipeline/ML/batch_test/deploy_test_databricks_batch_ml_model.py" dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --profile AZDO - python $(Build.Repository.LocalPath)/cicd-scripts/execute_script.py --shard $(WORKSPACE_REGION_URL) --token $(DATABRICKS_TOKEN) --cluster $(EXISTING_CLUSTER_ID) --dbfspath dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --outfilepath /home/vsts/work/1/s/pipeline --params "\-m,$(MODEL_NAME),\-r,dbfs:/FileStore/Shared/db-automation,\-s,staging" + python $(Build.Repository.LocalPath)/cicd-scripts/execute_script.py --shard $(WORKSPACE_REGION_URL) --token $(DATABRICKS_TOKEN) --cluster $(EXISTING_CLUSTER_ID) --dbfspath dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --outfilepath /home/vsts/work/1/s/pipeline --params "\-m,$(MODEL_NAME),\-r,/FileStore/Shared/db-automation,\-s,staging" else echo "We're not testing the model in this event" fi diff --git a/pipeline/ML/inference/batch_model.py b/pipeline/ML/inference/batch_model.py index 1cd62be..5f339b5 100644 --- a/pipeline/ML/inference/batch_model.py +++ b/pipeline/ML/inference/batch_model.py @@ -46,7 +46,7 @@ def main(): args = parser.parse_args() model_name = args.model_name home = args.root_path - stage = args.stage + stage = args.stage.replace(" ", "") db = args.db_name.replace("@", "_").replace(".", "_") ml_output_predictions_table = args.table_name From 34205b9ab53ac4a692ca810f99a247c0569f73b1 Mon Sep 17 00:00:00 2001 From: Miguel Peralvo Date: Sun, 21 Jun 2020 11:02:50 +0100 Subject: [PATCH 05/14] Updating pipeline: data_ml_pipeline --- resources/adf/pipeline/data_ml_pipeline.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resources/adf/pipeline/data_ml_pipeline.json b/resources/adf/pipeline/data_ml_pipeline.json index e267fc1..d9d6771 100644 --- a/resources/adf/pipeline/data_ml_pipeline.json +++ b/resources/adf/pipeline/data_ml_pipeline.json @@ -79,10 +79,10 @@ "typeProperties": { "pythonFile": "dbfs:/FileStore/Shared/db-automation/ML/batch_model.py", "parameters": [ - "-m mlops-wine-model", + "--model_name=mlops-wine-model", "@pipeline().parameters.environment", - "-r /FileStore/Shared/db-automation", - "-t wine_output_table_1" + "--root_path=dbfs:/FileStore/Shared/db-automation", + "--table_name=wine_output_table_1" ] }, "linkedServiceName": { @@ -94,7 +94,7 @@ "parameters": { "environment": { "type": "string", - "defaultValue": "-s test" + "defaultValue": "--stage=test" } }, "annotations": [] From 25b7966ea4f498ec0411868bf52ed41688175ab5 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 11:05:40 +0100 Subject: [PATCH 06/14] Restore prefix. --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6765338..8059d24 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -146,7 +146,7 @@ stages: echo "" echo "-------------" dbfs cp --overwrite $(Build.Repository.LocalPath)/"pipeline/ML/batch_test/deploy_test_databricks_batch_ml_model.py" dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --profile AZDO - python $(Build.Repository.LocalPath)/cicd-scripts/execute_script.py --shard $(WORKSPACE_REGION_URL) --token $(DATABRICKS_TOKEN) --cluster $(EXISTING_CLUSTER_ID) --dbfspath dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --outfilepath /home/vsts/work/1/s/pipeline --params "\-m,$(MODEL_NAME),\-r,/FileStore/Shared/db-automation,\-s,staging" + python $(Build.Repository.LocalPath)/cicd-scripts/execute_script.py --shard $(WORKSPACE_REGION_URL) --token $(DATABRICKS_TOKEN) --cluster $(EXISTING_CLUSTER_ID) --dbfspath dbfs:/FileStore/Shared/db-automation/batch_test/deploy_test_databricks_batch_ml_model.py --outfilepath /home/vsts/work/1/s/pipeline --params "\-m,$(MODEL_NAME),\-r,dbfs:/FileStore/Shared/db-automation,\-s,staging" else echo "We're not testing the model in this event" fi From 871eacc4993af799adaae198ba64aa026ac3e037 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 11:36:49 +0100 Subject: [PATCH 07/14] Adding model_path --- azure-pipelines.yml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8059d24..5e1aeae 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -245,22 +245,17 @@ stages: displayName: 'Import ML Batch Inference script' - script: | - id echo $HOME echo "[registry]" > $HOME/.databrickscfg echo "host=$(WORKSPACE_REGION_URL)" >> $HOME/.databrickscfg echo "token=$(DATABRICKS_TOKEN)" >> $HOME/.databrickscfg cat /home/vsts/.databrickscfg mkdir -p /home/vsts/mlflow/$(MODEL_NAME)/artifacts - displayName: 'Configure mlflow client connection to mlflow global (per org) model registry' - - - task: PythonScript@0 - inputs: - scriptSource: 'filePath' - scriptPath: '$(Build.Repository.LocalPath)/cicd-scripts/remote_registry_mlflow.py' - arguments: '--output_local_path=/home/vsts/mlflow/$(MODEL_NAME)/artifacts --model $(MODEL_NAME)' + python $(Build.Repository.LocalPath)/cicd-scripts/remote_registry_mlflow.py --output_local_path=/home/vsts/mlflow/$(MODEL_NAME)/artifacts --model $(MODEL_NAME) + dbfs cp --recursive --overwrite /home/vsts/mlflow/$(MODEL_NAME)/artifacts/ dbfs:/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)/model/ --profile AZDO displayName: 'Retrieve artifacts from the mlflow global (per org) model registry to use them in Databricks Staging' + - task: liprec.vsts-publish-adf.deploy-adf-json.deploy-adf-json@2 displayName: 'Deploy Data Pipeline to $(STAGING_ADF_NAME) ADF' inputs: @@ -276,7 +271,7 @@ stages: scriptType: bash scriptLocation: inlineScript inlineScript: | - python $(Build.Repository.LocalPath)/cicd-scripts/adf_pipeline_run.py -r $(RESOURCE_GROUP) -a $(STAGING_ADF_NAME) -p $(STAGING_ADF_PIPELINE_NAME) -o ./logs/json -pa "{\"environment\":\"-s staging\"}" + python $(Build.Repository.LocalPath)/cicd-scripts/adf_pipeline_run.py -r $(RESOURCE_GROUP) -a $(STAGING_ADF_NAME) -p $(STAGING_ADF_PIPELINE_NAME) -o ./logs/json -pa "{\"environment\":\"--stage=staging\", \"model_path\":\"--model_path=dbfs:/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)\"}" useGlobalConfig: true timeoutInMinutes: 10 From cde2ca253642a05b0901524cea2abe8c5d08afc3 Mon Sep 17 00:00:00 2001 From: Miguel Peralvo Date: Sun, 21 Jun 2020 11:40:01 +0100 Subject: [PATCH 08/14] Updating pipeline: data_ml_pipeline --- resources/adf/pipeline/data_ml_pipeline.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/resources/adf/pipeline/data_ml_pipeline.json b/resources/adf/pipeline/data_ml_pipeline.json index d9d6771..bad13d1 100644 --- a/resources/adf/pipeline/data_ml_pipeline.json +++ b/resources/adf/pipeline/data_ml_pipeline.json @@ -82,7 +82,8 @@ "--model_name=mlops-wine-model", "@pipeline().parameters.environment", "--root_path=dbfs:/FileStore/Shared/db-automation", - "--table_name=wine_output_table_1" + "--table_name=wine_output_table_1", + "@pipeline().parameters.model_path" ] }, "linkedServiceName": { @@ -95,6 +96,10 @@ "environment": { "type": "string", "defaultValue": "--stage=test" + }, + "model_path": { + "type": "string", + "defaultValue": "--model_path=/dbfs/FileStore/Shared/db-automation/mlflow/mlops-wine-model" } }, "annotations": [] From a8ea95bc2353328fe9ebf8cea58989e7e319b496 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 11:49:35 +0100 Subject: [PATCH 09/14] Adding model_path parameter --- azure-pipelines.yml | 2 +- pipeline/ML/inference/batch_model.py | 31 +++++++--------------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5e1aeae..0d4bbe6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -271,7 +271,7 @@ stages: scriptType: bash scriptLocation: inlineScript inlineScript: | - python $(Build.Repository.LocalPath)/cicd-scripts/adf_pipeline_run.py -r $(RESOURCE_GROUP) -a $(STAGING_ADF_NAME) -p $(STAGING_ADF_PIPELINE_NAME) -o ./logs/json -pa "{\"environment\":\"--stage=staging\", \"model_path\":\"--model_path=dbfs:/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)\"}" + python $(Build.Repository.LocalPath)/cicd-scripts/adf_pipeline_run.py -r $(RESOURCE_GROUP) -a $(STAGING_ADF_NAME) -p $(STAGING_ADF_PIPELINE_NAME) -o ./logs/json -pa "{\"environment\":\"--stage=staging\", \"model_path\":\"--model_path=/dbfs/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)\"}" useGlobalConfig: true timeoutInMinutes: 10 diff --git a/pipeline/ML/inference/batch_model.py b/pipeline/ML/inference/batch_model.py index 5f339b5..34b747d 100644 --- a/pipeline/ML/inference/batch_model.py +++ b/pipeline/ML/inference/batch_model.py @@ -41,7 +41,9 @@ def main(): parser.add_argument( "-t", "--table_name", help="Output Table name", default="mlops_wine_quality_regression", required=False) - # parser.add_argument("-p", "--phase", help="Phase", default="qa", required=True) + parser.add_argument( + "-p", "--model_path", help="Model's artifacts path", + default="/dbfs/FileStore/Shared/db-automation/mlflow/wine-model", required=True) args = parser.parse_args() model_name = args.model_name @@ -49,12 +51,14 @@ def main(): stage = args.stage.replace(" ", "") db = args.db_name.replace("@", "_").replace(".", "_") ml_output_predictions_table = args.table_name + model_path = args.model_path print(f"Model name: {model_name}") print(f"home: {home}") print(f"stage: {stage}") print(f"db: {db}") print(f"ml_output_predictions_table: {ml_output_predictions_table}") + print(f"model_path: {model_path}") print("batch_inference") temp_data_path = f"/dbfs/tmp/mlflow-wine-quality.csv" @@ -63,26 +67,10 @@ def main(): wine_df = spark.read.format("csv").option("header", "true").load(dbfs_wine_data_path).drop("quality").cache() wine_df = wine_df.select(*(col(column).cast("float").alias(column.replace(" ", "_")) for column in wine_df.columns)) data_spark = wine_df - - # Pointing to the right model registry - host = dbutils.secrets.get(scope = "azure-demo-mlflow", key = "mlflow_host_registry") - token = dbutils.secrets.get(scope="azure-demo-mlflow", key="mlflow_token_registry") - cli_profile_name = 'registry' - dbutils.fs.put("file:///root/.databrickscfg", "[%s]\nhost=%s\ntoken=%s" % (cli_profile_name, host, token), - overwrite=True) - - TRACKING_URI = "databricks://%s" % cli_profile_name - print(TRACKING_URI) - remote_client = MlflowClient(tracking_uri=TRACKING_URI) - mlflow.set_tracking_uri(TRACKING_URI) - artifact_path = 'model' - - latest_model = remote_client.get_latest_versions(name=model_name, stages=[stage]) - print(f"Latest Model: {latest_model}") - run_id = latest_model[0].run_id - artifact_uri = artifact_utils.get_artifact_uri(run_id) + model_artifact = 'model' + artifact_uri = model_path print(f"artifact_uri: {artifact_uri}") - model_uri = f"runs:/{latest_model[0].run_id}/{artifact_path}" + model_uri = f"{artifact_uri}/{model_artifact}" print(f"model_uri: {model_uri}") udf = pyfunc.spark_udf(spark, model_uri) @@ -92,7 +80,6 @@ def main(): spark.sql(f"CREATE DATABASE IF NOT EXISTS {db}") spark.sql(f"DROP TABLE IF EXISTS {db}.{ml_output_predictions_table}") predictions.write.format("delta").mode("overwrite").saveAsTable(f"{db}.{ml_output_predictions_table}") - output = json.dumps({ "model_name": model_name, "model_uri": model_uri @@ -101,8 +88,6 @@ def main(): print(output) - - if __name__ == '__main__': main() # sys.exit(0) From c8041c37e9969f547494b2fb088a0b2e350830b2 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 12:00:43 +0100 Subject: [PATCH 10/14] Don't overwrite the AZDO profile --- azure-pipelines.yml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0d4bbe6..51d0b64 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -186,15 +186,15 @@ stages: - script: env | sort displayName: 'Environment / Context' -# - task: DownloadBuildArtifacts@0 -# displayName: 'Download Build Artifacts' -# inputs: -# buildType: specific -# project: '$(System.TeamProjectId)' -# pipeline: 1 -# buildVersionToDownload: latestFromBranch -# branchName: '$(System.PullRequest.SourceBranch)' -# artifactName: drop + - task: DownloadBuildArtifacts@0 + displayName: 'Download Build Artifacts' + inputs: + buildType: specific + project: '$(System.TeamProjectId)' + pipeline: 1 + buildVersionToDownload: latestFromBranch + branchName: '$(System.PullRequest.SourceBranch)' + artifactName: drop - task: UsePythonVersion@0 displayName: 'Use Python 3.7' @@ -246,7 +246,9 @@ stages: - script: | echo $HOME - echo "[registry]" > $HOME/.databrickscfg + ls $(Build.Repository.LocalPath)/libraries/python/dbxdemo/ + echo >> $HOME/.databrickscfg + echo "[registry]" >> $HOME/.databrickscfg echo "host=$(WORKSPACE_REGION_URL)" >> $HOME/.databrickscfg echo "token=$(DATABRICKS_TOKEN)" >> $HOME/.databrickscfg cat /home/vsts/.databrickscfg From a2b1141ef7511c857b1462edf7dc40e9a5e0daa6 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 12:07:39 +0100 Subject: [PATCH 11/14] Don't download the artifacts for the time being. --- azure-pipelines.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 51d0b64..2b05680 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -187,14 +187,14 @@ stages: displayName: 'Environment / Context' - task: DownloadBuildArtifacts@0 - displayName: 'Download Build Artifacts' - inputs: - buildType: specific - project: '$(System.TeamProjectId)' - pipeline: 1 - buildVersionToDownload: latestFromBranch - branchName: '$(System.PullRequest.SourceBranch)' - artifactName: drop +# displayName: 'Download Build Artifacts' +# inputs: +# buildType: specific +# project: '$(System.TeamProjectId)' +# pipeline: 1 +# buildVersionToDownload: latestFromBranch +# branchName: '$(System.PullRequest.SourceBranch)' +# artifactName: drop - task: UsePythonVersion@0 displayName: 'Use Python 3.7' From 5eabcffed349eebc123910367d4bfe23c2dde99b Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 12:12:03 +0100 Subject: [PATCH 12/14] Missing line. --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2b05680..3bb5be4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -186,7 +186,7 @@ stages: - script: env | sort displayName: 'Environment / Context' - - task: DownloadBuildArtifacts@0 +# - task: DownloadBuildArtifacts@0 # displayName: 'Download Build Artifacts' # inputs: # buildType: specific From 0a99250b5518de62bf89fbd8b6f2c6170b4bd983 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 12:35:34 +0100 Subject: [PATCH 13/14] Extract more information. --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3bb5be4..175bf8e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -254,6 +254,7 @@ stages: cat /home/vsts/.databrickscfg mkdir -p /home/vsts/mlflow/$(MODEL_NAME)/artifacts python $(Build.Repository.LocalPath)/cicd-scripts/remote_registry_mlflow.py --output_local_path=/home/vsts/mlflow/$(MODEL_NAME)/artifacts --model $(MODEL_NAME) + ls -latr /home/vsts/mlflow/$(MODEL_NAME)/artifacts dbfs cp --recursive --overwrite /home/vsts/mlflow/$(MODEL_NAME)/artifacts/ dbfs:/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)/model/ --profile AZDO displayName: 'Retrieve artifacts from the mlflow global (per org) model registry to use them in Databricks Staging' From 4d5474381e4a24706ac107cd1556557f9b7e5b92 Mon Sep 17 00:00:00 2001 From: MiguelPeralvo Date: Sun, 21 Jun 2020 12:38:32 +0100 Subject: [PATCH 14/14] Deploy in production. --- azure-pipelines.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 175bf8e..5069dfe 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -346,20 +346,17 @@ stages: displayName: 'Import ML Batch Inference script' - script: | - id echo $HOME - echo "[registry]" > $HOME/.databrickscfg + ls $(Build.Repository.LocalPath)/libraries/python/dbxdemo/ + echo >> $HOME/.databrickscfg + echo "[registry]" >> $HOME/.databrickscfg echo "host=$(WORKSPACE_REGION_URL)" >> $HOME/.databrickscfg echo "token=$(DATABRICKS_TOKEN)" >> $HOME/.databrickscfg cat /home/vsts/.databrickscfg mkdir -p /home/vsts/mlflow/$(MODEL_NAME)/artifacts - displayName: 'Configure mlflow client connection to global mlflow global (per org) model registry' - - - task: PythonScript@0 - inputs: - scriptSource: 'filePath' - scriptPath: '$(Build.Repository.LocalPath)/cicd-scripts/remote_registry_mlflow.py' - arguments: '--output_local_path=/home/vsts/mlflow/$(MODEL_NAME)/artifacts --model $(MODEL_NAME)' + python $(Build.Repository.LocalPath)/cicd-scripts/remote_registry_mlflow.py --output_local_path=/home/vsts/mlflow/$(MODEL_NAME)/artifacts --model $(MODEL_NAME) + ls -latr /home/vsts/mlflow/$(MODEL_NAME)/artifacts + dbfs cp --recursive --overwrite /home/vsts/mlflow/$(MODEL_NAME)/artifacts/ dbfs:/FileStore/Shared/db-automation/mlflow/$(MODEL_NAME)/model/ --profile AZDO displayName: 'Retrieve artifacts from the mlflow global (per org) model registry to use them in Databricks Production' - task: liprec.vsts-publish-adf.deploy-adf-json.deploy-adf-json@2