From a9834665f48a435ee53585949078724dcfcc8dd0 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 12:19:09 -0800 Subject: [PATCH 1/9] remove alpha from pipeline parameters --- .../diabetes_regression-ci-build-train.yml | 12 +--------- diabetes_regression/training/train.json | 3 +++ diabetes_regression/training/train.py | 23 +++++++++++-------- ...iabetes_regression_build_train_pipeline.py | 3 --- 4 files changed, 18 insertions(+), 23 deletions(-) create mode 100644 diabetes_regression/training/train.json diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index b89eb30c..9d9ed3b1 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -62,22 +62,12 @@ stages: echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID" name: 'getpipelineid' displayName: 'Get Pipeline ID' - - bash: | - # Generate a hyperparameter value as a random number between 0 and 1. - # A random value is used here to make the Azure ML dashboards "interesting" when testing - # the solution sample. - alpha=$(printf "0.%03d\n" $((($RANDOM*1000)/32767))) - echo "Alpha: $alpha" - echo "##vso[task.setvariable variable=ALPHA;isOutput=true]$alpha" - name: 'getalpha' - displayName: 'Generate random value for hyperparameter alpha' - job: "Run_ML_Pipeline" dependsOn: "Get_Pipeline_ID" displayName: "Trigger ML Training Pipeline" pool: server variables: AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ] - ALPHA: $[ dependencies.Get_Pipeline_ID.outputs['getalpha.ALPHA'] ] steps: - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 displayName: 'Invoke ML pipeline' @@ -85,7 +75,7 @@ stages: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' PipelineId: '$(AMLPIPELINE_ID)' ExperimentName: '$(EXPERIMENT_NAME)' - PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}' - job: "Training_Run_Report" dependsOn: "Run_ML_Pipeline" condition: always() diff --git a/diabetes_regression/training/train.json b/diabetes_regression/training/train.json new file mode 100644 index 00000000..3158cd1b --- /dev/null +++ b/diabetes_regression/training/train.json @@ -0,0 +1,3 @@ +{ + "alpha": 0.4 +} diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index f56daa99..67d6059b 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -32,6 +32,7 @@ from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.externals import joblib +import json def train_model(run, data, alpha): @@ -46,6 +47,10 @@ def train_model(run, data, alpha): preds, data["test"]["y"]), description="Mean squared error metric") return reg +def get_model_parameters(): + with open("train.json") as f: + data=json.load(f) + return data def main(): print("Running train.py") @@ -62,13 +67,6 @@ def main(): help="Name of the Model", default="sklearn_regression_model.pkl", ) - parser.add_argument( - "--alpha", - type=float, - default=0.5, - help=("Ridge regression regularization strength hyperparameter; " - "must be a positive float.") - ) parser.add_argument( "--dataset_name", @@ -79,14 +77,21 @@ def main(): print("Argument [build_id]: %s" % args.build_id) print("Argument [model_name]: %s" % args.model_name) - print("Argument [alpha]: %s" % args.alpha) print("Argument [dataset_name]: %s" % args.dataset_name) model_name = args.model_name build_id = args.build_id - alpha = args.alpha dataset_name = args.dataset_name + print("Getting training parameters") + + pars = get_model_parameters() + alpha = pars.get("alpha") + if alpha is None: + alpha = 0.5 + + print("Parameter alpha: %s" % alpha) + run = Run.get_context() ws = run.experiment.workspace diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 66913420..3676b2d6 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -48,8 +48,6 @@ def main(): name="model_name", default_value=e.model_name) build_id_param = PipelineParameter( name="build_id", default_value=e.build_id) - hyperparameter_alpha_param = PipelineParameter( - name="hyperparameter_alpha", default_value=0.5) dataset_name = "" if (e.datastore_name is not None and e.datafile_name is not None): @@ -70,7 +68,6 @@ def main(): arguments=[ "--build_id", build_id_param, "--model_name", model_name_param, - "--alpha", hyperparameter_alpha_param, "--dataset_name", dataset_name, ], runconfig=run_config, From aef04872111323f02295b4d867cebd71f214f297 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 12:24:46 -0800 Subject: [PATCH 2/9] linting --- diabetes_regression/training/train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index 67d6059b..a6fa1d6a 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -47,11 +47,13 @@ def train_model(run, data, alpha): preds, data["test"]["y"]), description="Mean squared error metric") return reg + def get_model_parameters(): with open("train.json") as f: - data=json.load(f) + data = json.load(f) return data + def main(): print("Running train.py") From 6d1388108beea0ad99f5c8d443edd0b61129da00 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 18:28:18 -0800 Subject: [PATCH 3/9] update config --- diabetes_regression/config.json | 6 ++++++ diabetes_regression/training/train.json | 3 --- diabetes_regression/training/train.py | 9 +++++---- 3 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 diabetes_regression/config.json delete mode 100644 diabetes_regression/training/train.json diff --git a/diabetes_regression/config.json b/diabetes_regression/config.json new file mode 100644 index 00000000..a7b4bc1c --- /dev/null +++ b/diabetes_regression/config.json @@ -0,0 +1,6 @@ +{ + "training": + { + "alpha": 0.4 + } +} diff --git a/diabetes_regression/training/train.json b/diabetes_regression/training/train.json deleted file mode 100644 index 3158cd1b..00000000 --- a/diabetes_regression/training/train.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "alpha": 0.4 -} diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index a6fa1d6a..a2953ceb 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -49,7 +49,7 @@ def train_model(run, data, alpha): def get_model_parameters(): - with open("train.json") as f: + with open("../config.json") as f: data = json.load(f) return data @@ -88,9 +88,10 @@ def main(): print("Getting training parameters") pars = get_model_parameters() - alpha = pars.get("alpha") - if alpha is None: - alpha = 0.5 + try: + alpha=pars["training"]["alpha"] + except KeyError: + alpha=0.5 print("Parameter alpha: %s" % alpha) From 00e11b52b82af3c5f4ce0bc114c7b5589d0d9e33 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 18:37:59 -0800 Subject: [PATCH 4/9] linting --- diabetes_regression/training/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index a2953ceb..b2c0196c 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -89,9 +89,9 @@ def main(): pars = get_model_parameters() try: - alpha=pars["training"]["alpha"] + alpha = pars["training"]["alpha"] except KeyError: - alpha=0.5 + alpha = 0.5 print("Parameter alpha: %s" % alpha) From 82743b256ce34c4d339fde548f2f48b6ea4ef577 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 18:53:53 -0800 Subject: [PATCH 5/9] move get_model_parameters to helper --- diabetes_regression/config.json | 8 ++++++++ diabetes_regression/training/train.py | 8 +------- diabetes_regression/util/model_helper.py | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/diabetes_regression/config.json b/diabetes_regression/config.json index a7b4bc1c..859fd84d 100644 --- a/diabetes_regression/config.json +++ b/diabetes_regression/config.json @@ -2,5 +2,13 @@ "training": { "alpha": 0.4 + }, + "evaluation": + { + + }, + "scoring": + { + } } diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index b2c0196c..f37f5a2a 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -32,7 +32,7 @@ from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.externals import joblib -import json +from util.model_helper import get_model_parameters def train_model(run, data, alpha): @@ -48,12 +48,6 @@ def train_model(run, data, alpha): return reg -def get_model_parameters(): - with open("../config.json") as f: - data = json.load(f) - return data - - def main(): print("Running train.py") diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py index 98df0bb8..5bd8d923 100644 --- a/diabetes_regression/util/model_helper.py +++ b/diabetes_regression/util/model_helper.py @@ -4,6 +4,21 @@ from azureml.core import Run from azureml.core import Workspace from azureml.core.model import Model as AMLModel +import json + + +def get_model_parameters(): + """ + Getting parameters from config.json + Parameters: + None + + Return: + a dictionary from config.json + """ + with open("../config.json") as f: + data = json.load(f) + return data def get_current_workspace() -> Workspace: From 96ad87af7692b4b11c9aa8b4cfd8a87a939fecda Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 19:02:29 -0800 Subject: [PATCH 6/9] cannot move to util due to unit tests --- diabetes_regression/training/train.py | 5 +++-- diabetes_regression/util/model_helper.py | 15 --------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index f37f5a2a..c25fa61f 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -32,7 +32,7 @@ from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.externals import joblib -from util.model_helper import get_model_parameters +import json def train_model(run, data, alpha): @@ -81,7 +81,8 @@ def main(): print("Getting training parameters") - pars = get_model_parameters() + with open("../config.json") as f: + pars = json.load(f) try: alpha = pars["training"]["alpha"] except KeyError: diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py index 5bd8d923..98df0bb8 100644 --- a/diabetes_regression/util/model_helper.py +++ b/diabetes_regression/util/model_helper.py @@ -4,21 +4,6 @@ from azureml.core import Run from azureml.core import Workspace from azureml.core.model import Model as AMLModel -import json - - -def get_model_parameters(): - """ - Getting parameters from config.json - Parameters: - None - - Return: - a dictionary from config.json - """ - with open("../config.json") as f: - data = json.load(f) - return data def get_current_workspace() -> Workspace: From 98a7d732409d124b313fc15b61095d2a67e7ca1a Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 19:26:08 -0800 Subject: [PATCH 7/9] documentation --- docs/getting_started.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 8b3167e4..cebe1b2c 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -86,6 +86,8 @@ For instructions on how to set up a local development environment, refer to the For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/diabetes_regression-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements. +**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). + Up until now you should have: * Forked (or cloned) the repo @@ -120,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com): (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com). **Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). -You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. +You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. ## Create an Azure DevOps Azure ML Workspace Service Connection @@ -187,7 +189,7 @@ specified). **Note:** If the model evaluation determines that the new model does not perform better than the previous one then the new model will not be registered and the pipeline will be cancelled. * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. - + Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**: ![trained model](./images/trained-model.png) @@ -247,7 +249,6 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta * The provided pipeline definition YAML file is a sample starting point, which you should tailor to your processes and environment. * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. -* The sample pipeline generates a random value for a model hyperparameter (ridge regression [*alpha*](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)) to generate 'interesting' charts when testing the sample. In a real application you should use fixed hyperparameter values. You can [tune hyperparameter values using Azure ML](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters), and manage their values in Azure DevOps Variable Groups. * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. * You can install additional Conda or pip packages by modifying the YAML environment configurations under the `diabetes_regression` directory. Make sure to use fixed version numbers for all packages to ensure reproducibility, and use the same versions across environments. * You can explore aspects of model observability in the solution, such as: From e76f89de07932deeb2d1c70eae7d1bdeeb224812 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 19:28:12 -0800 Subject: [PATCH 8/9] more doc --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index cebe1b2c..1d75bc05 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -86,7 +86,7 @@ For instructions on how to set up a local development environment, refer to the For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/diabetes_regression-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements. -**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). +**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). We don't provide any special serializers for this config file. So, it's up to you which template to support there. Up until now you should have: From f8af1971ce7af5b7ed9a51ea7d76103192cb4d72 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 31 Jan 2020 19:52:54 -0800 Subject: [PATCH 9/9] hm. it's executing from diabetes_regression root. --- diabetes_regression/training/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index c25fa61f..fcec4f65 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -81,7 +81,7 @@ def main(): print("Getting training parameters") - with open("../config.json") as f: + with open("config.json") as f: pars = json.load(f) try: alpha = pars["training"]["alpha"]