Updated repo to use the SDK more consistently (#67)

* Updated code to use SDK instead of JSON files * include location in environment setup * docs updates * Update docker-image-pipeline.yml for Azure Pipelines * Update docker-image-pipeline.yml for Azure Pipelines
microsoft · Sep 13, 2019 · eceb936 · eceb936
1 parent f1d85cc
commit eceb936
Show file tree

Hide file tree

Showing 13 changed files with 180 additions and 170 deletions.
diff --git a/.env.example b/.env.example
@@ -1,32 +1,43 @@
 # Azure Subscription Variables
-WORKSPACE_NAME = ''
-RESOURCE_GROUP = ''
 SUBSCRIPTION_ID = ''
 LOCATION = ''
 TENANT_ID = ''
+BASE_NAME = ''
+SP_APP_ID = ''
+SP_APP_SECRET = ''
+
+# Mock build/release ID for local testing - update ReleaseID each "release"
+BUILD_BUILDID = '001'
+RELEASE_RELEASEID = '001'
 
 # Azure ML Workspace Variables
 EXPERIMENT_NAME = ''
 SCRIPT_FOLDER = './'
-BLOB_STORE_NAME = ''
-# Remote VM Config
-REMOTE_VM_NAME = ''
-REMOTE_VM_USERNAME = ''
-REMOTE_VM_PASSWORD = ''
-REMOTE_VM_IP = ''
+
 # AML Compute Cluster Config
-AML_CLUSTER_NAME = ''
-AML_CLUSTER_VM_SIZE = ''
+AML_COMPUTE_CLUSTER_NAME = ''
+AML_COMPUTE_CLUSTER_CPU_SKU = ''
 AML_CLUSTER_MAX_NODES = ''
 AML_CLUSTER_MIN_NODES = ''
 AML_CLUSTER_PRIORITY = 'lowpriority'
 # Training Config
-MODEL_NAME = ''
-MODEL_VERSION = ''
+MODEL_NAME = 'sklearn_regression_model.pkl'
+MODEL_VERSION = '1'
+TRAIN_SCRIPT_PATH = 'training/train.py'
 # AML Pipeline Config
 TRAINING_PIPELINE_NAME = ''
 PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
 MODEL_PATH = ''
+EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
+REGISTER_SCRIPT_PATH = 'register/register_model.py'
+SOURCES_DIR_TRAIN = 'code'
+
+# These are not mandatory for the core workflow
+# Remote VM Config
+REMOTE_VM_NAME = ''
+REMOTE_VM_USERNAME = ''
+REMOTE_VM_PASSWORD = ''
+REMOTE_VM_IP = ''
 # Image config
 IMAGE_NAME = ''
 IMAGE_DESCRIPTION = ''

diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
@@ -23,7 +23,7 @@ steps:
   failOnStderr: 'false'
   env:
     SP_APP_SECRET: '$(SP_APP_SECRET)'
-  displayName: 'Train model using AML with Remote Compute'
+  displayName: 'Publish Azure Machine Learning Pipeline'
   enabled: 'true'
 
 - task: CopyFiles@2
@@ -32,7 +32,7 @@ steps:
     SourceFolder: '$(Build.SourcesDirectory)'
     TargetFolder: '$(Build.ArtifactStagingDirectory)'
     Contents: |
-      ml_service/pipelines/?(run_train_pipeline.py|*.json)  
+      ml_service/pipelines/?(run_train_pipeline.py|*.json)
       code/scoring/**
 
 

diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py
@@ -24,53 +24,45 @@
 POSSIBILITY OF SUCH DAMAGE.
 """
 import os
-import json
-from azureml.core.model import Model
-from azureml.core import Run
+from azureml.core import Model, Run
 import argparse
 
 
 # Get workspace
-# ws = Workspace.from_config()
 run = Run.get_context()
 exp = run.experiment
 ws = run.experiment.workspace
 
 
 parser = argparse.ArgumentParser("evaluate")
 parser.add_argument(
-    "--config_suffix", type=str, help="Datetime suffix for json config files"
+    "--release_id",
+    type=str,
+    help="The ID of the release triggering this pipeline run",
 )
 parser.add_argument(
-    "--json_config",
+    "--model_name",
     type=str,
-    help="Directory to write all the intermediate json configs",
+    help="Name of the Model",
+    default="sklearn_regression_model.pkl",
 )
 args = parser.parse_args()
 
-print("Argument 1: %s" % args.config_suffix)
-print("Argument 2: %s" % args.json_config)
+print("Argument 1: %s" % args.release_id)
+print("Argument 2: %s" % args.model_name)
+model_name = args.model_name
+release_id = args.release_id
 
-if not (args.json_config is None):
-    os.makedirs(args.json_config, exist_ok=True)
-    print("%s created" % args.json_config)
 # Paramaterize the matrics on which the models should be compared
 # Add golden data set on which all the model performance can be evaluated
 
-# Get the latest run_id
-# with open("aml_config/run_id.json") as f:
-#     config = json.load(f)
-
-train_run_id_json = "run_id_{}.json".format(args.config_suffix)
-train_output_path = os.path.join(args.json_config, train_run_id_json)
-with open(train_output_path) as f:
-    config = json.load(f)
-
-
-new_model_run_id = config["run_id"]  # args.train_run_id
-experiment_name = config["experiment_name"]
-# exp = Experiment(workspace=ws, name=experiment_name)
-
+all_runs = exp.get_runs(
+    properties={"release_id": release_id, "run_type": "train"},
+    include_children=True
+    )
+new_model_run = next(all_runs)
+new_model_run_id = new_model_run.id
+print(f'New Run found with Run ID of: {new_model_run_id}')
 
 try:
     # Get most recently registered model, we assume that
@@ -110,16 +102,12 @@
     print("This is the first model to be trained, \
           thus nothing to evaluate for now")
 
-run_id = {}
-run_id["run_id"] = ""
+
 # Writing the run id to /aml_config/run_id.json
 if promote_new_model:
-    run_id["run_id"] = new_model_run_id
-    # register new model
-    # new_model_run.register_model(model_name='',model_path='outputs/sklearn_regression_model.pkl')
-
-run_id["experiment_name"] = experiment_name
-filename = "run_id_{}.json".format(args.config_suffix)
-output_path = os.path.join(args.json_config, filename)
-with open(output_path, "w") as outfile:
-    json.dump(run_id, outfile)
+    model_path = os.path.join('outputs', model_name)
+    new_model_run.register_model(
+        model_name=model_name,
+        model_path=model_path,
+        properties={"release_id": release_id})
+    print("Registered new model!")
diff --git a/code/training/train.py b/code/training/train.py
@@ -32,17 +32,13 @@
 from sklearn.model_selection import train_test_split
 from sklearn.externals import joblib
 import numpy as np
-import json
 
 
 parser = argparse.ArgumentParser("train")
 parser.add_argument(
-    "--config_suffix", type=str, help="Datetime suffix for json config files"
-)
-parser.add_argument(
-    "--json_config",
+    "--release_id",
     type=str,
-    help="Directory to write all the intermediate json configs",
+    help="The ID of the release triggering this pipeline run",
 )
 parser.add_argument(
     "--model_name",
@@ -53,14 +49,11 @@
 
 args = parser.parse_args()
 
-print("Argument 1: %s" % args.config_suffix)
-print("Argument 2: %s" % args.json_config)
+print("Argument 1: %s" % args.release_id)
+print("Argument 2: %s" % args.model_name)
 
 model_name = args.model_name
-
-if not (args.json_config is None):
-    os.makedirs(args.json_config, exist_ok=True)
-    print("%s created" % args.json_config)
+release_id = args.release_id
 
 run = Run.get_context()
 exp = run.experiment
@@ -102,12 +95,8 @@
 print("Following files are uploaded ")
 print(run.get_file_names())
 
-run_id = {}
-run_id["run_id"] = run.id
-run_id["experiment_name"] = run.experiment.name
-filename = "run_id_{}.json".format(args.config_suffix)
-output_path = os.path.join(args.json_config, filename)
-with open(output_path, "w") as outfile:
-    json.dump(run_id, outfile)
+# Add properties to identify this specific training run
+run.add_properties({"release_id": release_id, "run_type": "train"})
+print(f"added properties: {run.properties}")
 
 run.complete()
diff --git a/docs/code_description.md b/docs/code_description.md
@@ -27,8 +27,8 @@
 ### Code
 
 - `code/training/train.py` : a training step of an ML training pipeline.
-- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline.
-- `code/evaluate/register_model.py` : registers a new trained model if evaluation shows the new model is more performant than the previous one.
+- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
+- `code/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.
 
 ### Scoring
 - code/scoring/score.py : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.

diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -49,6 +49,7 @@ The variable group should contain the following variables:
 | SUBSCRIPTION_ID             |                              |
 | TENANT_ID                   |                              |
 | TRAIN_SCRIPT_PATH           | training/train.py            |
+| TRAINING_PIPELINE_NAME      | training-pipeline            |
 
 Mark **SP_APP_SECRET** variable as a secret one.
 
@@ -88,6 +89,7 @@ Check out created resources in the [Azure Portal](portal.azure.com):
 
 Alternatively, you can also use a [cleaning pipeline](../environment_setup/iac-remove-environment.yml) that removes resources created for this project or you can just delete a resource group in the [Azure Portal](portal.azure.com).
 
+Once this resource group is created, be sure that the Service Principal you have created has access to this resource group.
 
 ### 6. Set up Build Pipeline
 
@@ -127,9 +129,11 @@ Rename the default "Stage 1" to **Invoke Training Pipeline** and make sure that
 Add a **Command Line Script** step, rename it to **Run Training Pipeline** with the following script:
 
 ```bash
-docker run  -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
--w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
--e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
+docker run -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
+ -w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
+ -e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
+ -e SUBSCRIPTION_ID=$SUBSCRIPTION_ID -e RELEASE_RELEASEID=$RELEASE_RELEASEID \
+ -e BUILD_BUILDID=$BUILD_BUILDID -e BASE_NAME=$BASE_NAME \
 mcr.microsoft.com/mlops/python:latest python run_train_pipeline.py
 ```
 

diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json
@@ -20,7 +20,8 @@
         "southeastasia",
         "westcentralus",
         "westeurope",
-        "westus2"
+        "westus2",
+        "centralus"
       ],
       "metadata": {
         "description": "Specifies the location for all resources."

diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml
@@ -22,7 +22,7 @@ steps:
     location: $(LOCATION)
     templateLocation: 'Linked artifact'
     csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
-    overrideParameters: '-baseName $(BASE_NAME)'
+    overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
     deploymentMode: 'Incremental'
   displayName: 'Deploy MLOps resources to Azure'
 

diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt
@@ -1,6 +1,5 @@
 pytest==4.3.0
 requests>=2.22
-azureml>=0.2
 azureml-sdk>=1.0
 python-dotenv>=0.10.3
 flake8

diff --git a/ml_service/__init__.py b/ml_service/__init__.py