From b3d598e60b57df74786230324b11b484e376523d Mon Sep 17 00:00:00 2001 From: Sushant Divate Date: Tue, 28 Jan 2020 17:06:46 -0800 Subject: [PATCH] added toggle for evaluation step (#152) # These are the default values set in ml_service\util\env_variables.py. Uncomment and override if desired. RUN_EVALUATION=true ALLOW_RUN_CANCEL=true --- .env.example | 8 +- .pipelines/diabetes_regression-variables.yml | 89 ++++++++++--------- README.md | 2 +- .../evaluate/evaluate_model.py | 16 +++- ...iabetes_regression_build_train_pipeline.py | 15 +++- ml_service/util/env_variables.py | 11 +++ 6 files changed, 89 insertions(+), 52 deletions(-) diff --git a/.env.example b/.env.example index d85e4586..af11a5a3 100644 --- a/.env.example +++ b/.env.example @@ -39,4 +39,10 @@ DATAFILE_NAME = 'diabetes.csv' DB_CLUSTER_ID = '' # Optional. Container Image name for image creation -IMAGE_NAME = 'mltrained' \ No newline at end of file +IMAGE_NAME = 'mltrained' + +# Run Evaluation Step in AML pipeline +RUN_EVALUATION = 'true' + +# Set to true cancels the Azure ML pipeline run when evaluation criteria are not met. +ALLOW_RUN_CANCEL = 'true' diff --git a/.pipelines/diabetes_regression-variables.yml b/.pipelines/diabetes_regression-variables.yml index 0186de32..5d7bc7ef 100644 --- a/.pipelines/diabetes_regression-variables.yml +++ b/.pipelines/diabetes_regression-variables.yml @@ -1,44 +1,49 @@ variables: # Azure ML Workspace Variables -- name: EXPERIMENT_NAME - value: mlopspython - # AML Compute Cluster Config -- name: AML_COMPUTE_CLUSTER_CPU_SKU - value: STANDARD_DS2_V2 -- name: AML_COMPUTE_CLUSTER_NAME - value: train-cluster -- name: AML_CLUSTER_MIN_NODES - value: 0 -- name: AML_CLUSTER_MAX_NODES - value: 4 -- name: AML_CLUSTER_PRIORITY - value: lowpriority - # Training Config -- name: BUILD_TRAIN_SCRIPT - value: diabetes_regression_build_train_pipeline.py -- name: TRAIN_SCRIPT_PATH - value: training/train.py -- name: MODEL_NAME - value: sklearn_regression_model.pkl -- name: MODEL_VERSION - value: '1' - # AML Pipeline Config -- name: TRAINING_PIPELINE_NAME - value: 'diabetes-Training-Pipeline' -- name: MODEL_PATH - value: '' -- name: EVALUATE_SCRIPT_PATH - value: evaluate/evaluate_model.py -- name: REGISTER_SCRIPT_PATH - value: register/register_model.py -- name: SOURCES_DIR_TRAIN - value: diabetes_regression -- name: IMAGE_NAME - value: 'diabetestrained' - # Optional. Used by a training pipeline with R on Databricks -- name: DB_CLUSTER_ID - value: '' -- name: SCORE_SCRIPT - value: score.py -- name: DATASET_NAME - value: diabetes_ds + - name: EXPERIMENT_NAME + value: mlopspython + # AML Compute Cluster Config + - name: AML_COMPUTE_CLUSTER_CPU_SKU + value: STANDARD_DS2_V2 + - name: AML_COMPUTE_CLUSTER_NAME + value: train-cluster + - name: AML_CLUSTER_MIN_NODES + value: 0 + - name: AML_CLUSTER_MAX_NODES + value: 4 + - name: AML_CLUSTER_PRIORITY + value: lowpriority + # Training Config + - name: BUILD_TRAIN_SCRIPT + value: diabetes_regression_build_train_pipeline.py + - name: TRAIN_SCRIPT_PATH + value: training/train.py + - name: MODEL_NAME + value: sklearn_regression_model.pkl + - name: MODEL_VERSION + value: "1" + # AML Pipeline Config + - name: TRAINING_PIPELINE_NAME + value: "diabetes-Training-Pipeline" + - name: MODEL_PATH + value: "" + - name: EVALUATE_SCRIPT_PATH + value: evaluate/evaluate_model.py + - name: REGISTER_SCRIPT_PATH + value: register/register_model.py + - name: SOURCES_DIR_TRAIN + value: diabetes_regression + - name: IMAGE_NAME + value: "diabetestrained" + # Optional. Used by a training pipeline with R on Databricks + - name: DB_CLUSTER_ID + value: "" + - name: SCORE_SCRIPT + value: score.py + - name: DATASET_NAME + value: diabetes_ds + # These are the default values set in ml_service\util\env_variables.py. Uncomment and override if desired. + # - name: RUN_EVALUATION + # value: "true" + # - name: ALLOW_RUN_CANCEL + # value: "true" diff --git a/README.md b/README.md index e622ba75..c0416cff 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ description: "Code which demonstrates how to set up and operationalize an MLOps # MLOps with Azure ML -[![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-CI?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=127&branchName=master) +[![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=151&branchName=master) MLOps will help you to understand how to build the Continuous Integration and Continuous Delivery pipeline for a ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py index 2218137f..cbac5bb9 100644 --- a/diabetes_regression/evaluate/evaluate_model.py +++ b/diabetes_regression/evaluate/evaluate_model.py @@ -83,6 +83,12 @@ help="Name of the Model", default="sklearn_regression_model.pkl", ) +parser.add_argument( + "--allow_run_cancel", + type=str, + help="Set this to false to avoid evaluation step from cancelling run after an unsuccessful evaluation", # NOQA: E501 + default="true", +) args = parser.parse_args() if (args.build_id is not None): @@ -98,8 +104,8 @@ if (builduri_base is not None): build_uri = builduri_base + build_id run.tag("BuildUri", value=build_uri) - -# Paramaterize the matrices on which the models should be compared +allow_run_cancel = args.allow_run_cancel +# Parameterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated try: firstRegistration = False @@ -124,7 +130,8 @@ if (production_model_mse is None or new_model_mse is None): print("Unable to find", metric_eval, "metrics, " "exiting evaluation") - run.parent.cancel() + if((allow_run_cancel).lower() == 'true'): + run.parent.cancel() else: print( "Current Production model mse: {}, " @@ -139,7 +146,8 @@ else: print("New trained model metric is less than or equal to " "production model so skipping model registration.") - run.parent.cancel() + if((allow_run_cancel).lower() == 'true'): + run.parent.cancel() else: print("This is the first model, " "thus it should be registered") diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 0b86eb50..0e963d96 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -89,6 +89,7 @@ def main(): arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, + "--allow_run_cancel", e.allow_run_cancel, ], runconfig=run_config, allow_reuse=False, @@ -108,10 +109,16 @@ def main(): allow_reuse=False, ) print("Step Register created") - - evaluate_step.run_after(train_step) - register_step.run_after(evaluate_step) - steps = [train_step, evaluate_step, register_step] + # Check run_evaluation flag to include or exclude evaluation step. + if ((e.run_evaluation).lower() == 'true'): + print("Include evaluation step before register step.") + evaluate_step.run_after(train_step) + register_step.run_after(evaluate_step) + steps = [train_step, evaluate_step, register_step] + else: + print("Exclude evaluation step and directly run register step.") + register_step.run_after(train_step) + steps = [train_step, register_step] train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline._set_experiment_name diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index ed3be221..c3f30e72 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -44,6 +44,9 @@ def __init__(self): self._datastore_name = os.environ.get("DATASTORE_NAME") self._datafile_name = os.environ.get("DATAFILE_NAME") self._dataset_name = os.environ.get("DATASET_NAME") + self._run_evaluation = os.environ.get("RUN_EVALUATION", "true") + self._allow_run_cancel = os.environ.get( + "ALLOW_RUN_CANCEL", "true") @property def workspace_name(self): @@ -160,3 +163,11 @@ def datafile_name(self): @property def dataset_name(self): return self._dataset_name + + @property + def run_evaluation(self): + return self._run_evaluation + + @property + def allow_run_cancel(self): + return self._allow_run_cancel