From b0da6451c1aa51a8754e16a73e2f657201427fbb Mon Sep 17 00:00:00 2001 From: jotaylo Date: Tue, 25 Feb 2020 09:15:50 -0800 Subject: [PATCH] Enable deploy of model registered by previous build (#207) Allows you to skip the first two stages of the pipeline and deploy a model registered by a previous build by setting the variable MODEL_BUILD_ID to the previous build's id when queuing the pipeline. This speeds up the process of fixing issues with the scoring or deployment configuration. --- .pipelines/diabetes_regression-ci-build-train.yml | 6 ++++-- ...betes_regression-template-get-model-version.yml | 2 +- .pipelines/diabetes_regression-variables.yml | 5 +++++ README.md | 2 +- docs/getting_started.md | 14 +++++++------- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index f60d11db..9c653978 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -21,6 +21,7 @@ pool: stages: - stage: 'Model_CI' displayName: 'Model CI' + condition: not(variables['MODEL_BUILD_ID']) jobs: - job: "Model_CI_Pipeline" displayName: "Model CI Pipeline" @@ -42,6 +43,7 @@ stages: - stage: 'Trigger_AML_Pipeline' displayName: 'Train model' + condition: not(variables['MODEL_BUILD_ID']) jobs: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) @@ -90,7 +92,7 @@ stages: - stage: 'Deploy_ACI' displayName: 'Deploy to ACI' dependsOn: Trigger_AML_Pipeline - condition: and(succeeded(), variables['ACI_DEPLOYMENT_NAME']) + condition: and(or(succeeded(), variables['MODEL_BUILD_ID']), variables['ACI_DEPLOYMENT_NAME']) jobs: - job: "Deploy_ACI" displayName: "Deploy to ACI" @@ -157,7 +159,7 @@ stages: - stage: 'Deploy_Webapp' displayName: 'Deploy to Webapp' dependsOn: Trigger_AML_Pipeline - condition: and(succeeded(), variables['WEBAPP_DEPLOYMENT_NAME']) + condition: and(or(succeeded(), variables['MODEL_BUILD_ID']), variables['WEBAPP_DEPLOYMENT_NAME']) jobs: - job: "Deploy_Webapp" displayName: "Deploy to Webapp" diff --git a/.pipelines/diabetes_regression-template-get-model-version.yml b/.pipelines/diabetes_regression-template-get-model-version.yml index 90f684f7..870985a6 100644 --- a/.pipelines/diabetes_regression-template-get-model-version.yml +++ b/.pipelines/diabetes_regression-template-get-model-version.yml @@ -7,7 +7,7 @@ steps: inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) - python -m ml_service.pipelines.diabetes_regression_verify_train_pipeline --build_id $(Build.BuildId) --output_model_version_file "model_version.txt" + python -m ml_service.pipelines.diabetes_regression_verify_train_pipeline --build_id $(modelbuildid) --output_model_version_file "model_version.txt" # Output model version to Azure DevOps job MODEL_VERSION="$(cat model_version.txt)" echo "##vso[task.setvariable variable=MODEL_VERSION]$MODEL_VERSION" diff --git a/.pipelines/diabetes_regression-variables.yml b/.pipelines/diabetes_regression-variables.yml index 4423cc60..ef27bf5a 100644 --- a/.pipelines/diabetes_regression-variables.yml +++ b/.pipelines/diabetes_regression-variables.yml @@ -51,3 +51,8 @@ variables: # value: "true" # - name: ALLOW_RUN_CANCEL # value: "true" + + # For debugging deployment issues. Specify a build id with the MODEL_BUILD_ID pipeline variable at queue time + # to skip training and deploy a model registered by a previous build. + - name: modelbuildid + value: $[coalesce(variables['MODEL_BUILD_ID'], variables['Build.BuildId'])] \ No newline at end of file diff --git a/README.md b/README.md index 324f3249..77d9024f 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ You can find the details of the code and scripts in the repository [here](/docs/ - [Azure ML Python SDK Quickstart](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python) - [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/?view=vsts) -# Contributing +## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us diff --git a/docs/getting_started.md b/docs/getting_started.md index a93b9fa5..936b01c3 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -17,7 +17,6 @@ This repository contains a template and demonstrates how to apply it to a sample If the desire is to adopt this template for your project and to use it with your machine learning code, it is recommended to go through this guide as it is first. This ensures everything is working on your environment. After the sample is working, follow the [bootstrap instructions](../bootstrap/README.md) to convert the ***diabetes_regression*** sample into your project starting point. - ## Create a Variable Group for your Pipeline We make use of a variable group inside Azure DevOps to store variables and their @@ -173,7 +172,6 @@ and check out the published training pipeline in the **mlops-AML-WS** workspace Great, you now have the build pipeline set up which automatically triggers every time there's a change in the master branch. - * The first stage of the pipeline, **Model CI**, performs linting, unit testing, build and publishes an **ML Training Pipeline** in an **ML Workspace**. **Note:** The build pipeline also supports building and publishing ML @@ -187,7 +185,7 @@ with R on Azure ML Compute. You will also need to uncomment (i.e. include) the to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be -specified). Example ML pipelines using R have a single step to train a model. They don't demonstrate how to evaluate and register a model. The evaluation and registering techniques are shown only in the Python implementation. +specified). Example ML pipelines using R have a single step to train a model. They don't demonstrate how to evaluate and register a model. The evaluation and registering techniques are shown only in the Python implementation. * The second stage of the pipeline, **Train model**, triggers the run of the ML Training Pipeline. The training pipeline will train, evaluate, and register a new model. The actual computation is performed in an [Azure Machine Learning Compute cluster](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute). In Azure DevOps, this stage runs an agentless job that waits for the completion of the Azure ML job, allowing the pipeline to wait for training completion for hours or even days without using agent resources. @@ -195,7 +193,7 @@ specified). Example ML pipelines using R have a single step to train a model. Th * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. -The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](./environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. +The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](./environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**: @@ -203,6 +201,8 @@ Wait until the pipeline finishes and verify that there is a new model in the **M To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\diabetes_regression-ci-build-train.yml` pipeline to `false`. This can also be overridden at runtime execution of the pipeline. +To skip model training and registration, and deploy a model successfully registered by a previous build (for testing changes to the score file or inference configuration), add the variable `MODEL_BUILD_ID` when the pipeline is queued, and set the value to the id of the previous build. + ### Deploy the Model to Azure Kubernetes Service The final stage is to deploy the model to the production environment running on @@ -238,7 +238,7 @@ In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the va | ---------------------- | ---------------------- | | WEBAPP_DEPLOYMENT_NAME | _name of your web app_ | -Set **WEBAPP_DEPLOYMENT_NAME** to the name of your Azure Web App. This app must exist before you can deploy the model to it. +Set **WEBAPP_DEPLOYMENT_NAME** to the name of your Azure Web App. This app must exist before you can deploy the model to it. Delete the **ACI_DEPLOYMENT_NAME** variable. @@ -254,10 +254,10 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta ![build](./images/multi-stage-webapp.png) -# Next steps +## Next steps * You may wish to follow the [bootstrap instructions](../bootstrap/README.md) to create a starting point for your project use case. -* Use the [Convert ML experimental code to production code](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production#use-your-own-model-with-mlopspython-code-template) tutorial which explains how to bring your machine learning code on top of this template. +* Use the [Convert ML experimental code to production code](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production#use-your-own-model-with-mlopspython-code-template) tutorial which explains how to bring your machine learning code on top of this template. * The provided pipeline definition YAML file is a sample starting point, which you should tailor to your processes and environment. * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages.