Skip to content

Commit

Permalink
Updated repo to use the SDK more consistently (#67)
Browse files Browse the repository at this point in the history
* Updated code to use SDK instead of JSON files

* include location in environment setup

* docs updates

* Update docker-image-pipeline.yml for Azure Pipelines

* Update docker-image-pipeline.yml for Azure Pipelines
  • Loading branch information
tarockey authored and eedorenko committed Sep 13, 2019
1 parent f1d85cc commit eceb936
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 170 deletions.
35 changes: 23 additions & 12 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,32 +1,43 @@
# Azure Subscription Variables
WORKSPACE_NAME = ''
RESOURCE_GROUP = ''
SUBSCRIPTION_ID = ''
LOCATION = ''
TENANT_ID = ''
BASE_NAME = ''
SP_APP_ID = ''
SP_APP_SECRET = ''

# Mock build/release ID for local testing - update ReleaseID each "release"
BUILD_BUILDID = '001'
RELEASE_RELEASEID = '001'

# Azure ML Workspace Variables
EXPERIMENT_NAME = ''
SCRIPT_FOLDER = './'
BLOB_STORE_NAME = ''
# Remote VM Config
REMOTE_VM_NAME = ''
REMOTE_VM_USERNAME = ''
REMOTE_VM_PASSWORD = ''
REMOTE_VM_IP = ''

# AML Compute Cluster Config
AML_CLUSTER_NAME = ''
AML_CLUSTER_VM_SIZE = ''
AML_COMPUTE_CLUSTER_NAME = ''
AML_COMPUTE_CLUSTER_CPU_SKU = ''
AML_CLUSTER_MAX_NODES = ''
AML_CLUSTER_MIN_NODES = ''
AML_CLUSTER_PRIORITY = 'lowpriority'
# Training Config
MODEL_NAME = ''
MODEL_VERSION = ''
MODEL_NAME = 'sklearn_regression_model.pkl'
MODEL_VERSION = '1'
TRAIN_SCRIPT_PATH = 'training/train.py'
# AML Pipeline Config
TRAINING_PIPELINE_NAME = ''
PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
MODEL_PATH = ''
EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
REGISTER_SCRIPT_PATH = 'register/register_model.py'
SOURCES_DIR_TRAIN = 'code'

# These are not mandatory for the core workflow
# Remote VM Config
REMOTE_VM_NAME = ''
REMOTE_VM_USERNAME = ''
REMOTE_VM_PASSWORD = ''
REMOTE_VM_IP = ''
# Image config
IMAGE_NAME = ''
IMAGE_DESCRIPTION = ''
Expand Down
4 changes: 2 additions & 2 deletions .pipelines/azdo-ci-build-train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ steps:
failOnStderr: 'false'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
displayName: 'Train model using AML with Remote Compute'
displayName: 'Publish Azure Machine Learning Pipeline'
enabled: 'true'

- task: CopyFiles@2
Expand All @@ -32,7 +32,7 @@ steps:
SourceFolder: '$(Build.SourcesDirectory)'
TargetFolder: '$(Build.ArtifactStagingDirectory)'
Contents: |
ml_service/pipelines/?(run_train_pipeline.py|*.json)
ml_service/pipelines/?(run_train_pipeline.py|*.json)
code/scoring/**
Expand Down
62 changes: 25 additions & 37 deletions code/evaluate/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,45 @@
POSSIBILITY OF SUCH DAMAGE.
"""
import os
import json
from azureml.core.model import Model
from azureml.core import Run
from azureml.core import Model, Run
import argparse


# Get workspace
# ws = Workspace.from_config()
run = Run.get_context()
exp = run.experiment
ws = run.experiment.workspace


parser = argparse.ArgumentParser("evaluate")
parser.add_argument(
"--config_suffix", type=str, help="Datetime suffix for json config files"
"--release_id",
type=str,
help="The ID of the release triggering this pipeline run",
)
parser.add_argument(
"--json_config",
"--model_name",
type=str,
help="Directory to write all the intermediate json configs",
help="Name of the Model",
default="sklearn_regression_model.pkl",
)
args = parser.parse_args()

print("Argument 1: %s" % args.config_suffix)
print("Argument 2: %s" % args.json_config)
print("Argument 1: %s" % args.release_id)
print("Argument 2: %s" % args.model_name)
model_name = args.model_name
release_id = args.release_id

if not (args.json_config is None):
os.makedirs(args.json_config, exist_ok=True)
print("%s created" % args.json_config)
# Paramaterize the matrics on which the models should be compared
# Add golden data set on which all the model performance can be evaluated

# Get the latest run_id
# with open("aml_config/run_id.json") as f:
# config = json.load(f)

train_run_id_json = "run_id_{}.json".format(args.config_suffix)
train_output_path = os.path.join(args.json_config, train_run_id_json)
with open(train_output_path) as f:
config = json.load(f)


new_model_run_id = config["run_id"] # args.train_run_id
experiment_name = config["experiment_name"]
# exp = Experiment(workspace=ws, name=experiment_name)

all_runs = exp.get_runs(
properties={"release_id": release_id, "run_type": "train"},
include_children=True
)
new_model_run = next(all_runs)
new_model_run_id = new_model_run.id
print(f'New Run found with Run ID of: {new_model_run_id}')

try:
# Get most recently registered model, we assume that
Expand Down Expand Up @@ -110,16 +102,12 @@
print("This is the first model to be trained, \
thus nothing to evaluate for now")

run_id = {}
run_id["run_id"] = ""

# Writing the run id to /aml_config/run_id.json
if promote_new_model:
run_id["run_id"] = new_model_run_id
# register new model
# new_model_run.register_model(model_name='',model_path='outputs/sklearn_regression_model.pkl')

run_id["experiment_name"] = experiment_name
filename = "run_id_{}.json".format(args.config_suffix)
output_path = os.path.join(args.json_config, filename)
with open(output_path, "w") as outfile:
json.dump(run_id, outfile)
model_path = os.path.join('outputs', model_name)
new_model_run.register_model(
model_name=model_name,
model_path=model_path,
properties={"release_id": release_id})
print("Registered new model!")
27 changes: 8 additions & 19 deletions code/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,13 @@
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
import numpy as np
import json


parser = argparse.ArgumentParser("train")
parser.add_argument(
"--config_suffix", type=str, help="Datetime suffix for json config files"
)
parser.add_argument(
"--json_config",
"--release_id",
type=str,
help="Directory to write all the intermediate json configs",
help="The ID of the release triggering this pipeline run",
)
parser.add_argument(
"--model_name",
Expand All @@ -53,14 +49,11 @@

args = parser.parse_args()

print("Argument 1: %s" % args.config_suffix)
print("Argument 2: %s" % args.json_config)
print("Argument 1: %s" % args.release_id)
print("Argument 2: %s" % args.model_name)

model_name = args.model_name

if not (args.json_config is None):
os.makedirs(args.json_config, exist_ok=True)
print("%s created" % args.json_config)
release_id = args.release_id

run = Run.get_context()
exp = run.experiment
Expand Down Expand Up @@ -102,12 +95,8 @@
print("Following files are uploaded ")
print(run.get_file_names())

run_id = {}
run_id["run_id"] = run.id
run_id["experiment_name"] = run.experiment.name
filename = "run_id_{}.json".format(args.config_suffix)
output_path = os.path.join(args.json_config, filename)
with open(output_path, "w") as outfile:
json.dump(run_id, outfile)
# Add properties to identify this specific training run
run.add_properties({"release_id": release_id, "run_type": "train"})
print(f"added properties: {run.properties}")

run.complete()
4 changes: 2 additions & 2 deletions docs/code_description.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
### Code

- `code/training/train.py` : a training step of an ML training pipeline.
- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline.
- `code/evaluate/register_model.py` : registers a new trained model if evaluation shows the new model is more performant than the previous one.
- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
- `code/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.

### Scoring
- code/scoring/score.py : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.
Expand Down
10 changes: 7 additions & 3 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ The variable group should contain the following variables:
| SUBSCRIPTION_ID | |
| TENANT_ID | |
| TRAIN_SCRIPT_PATH | training/train.py |
| TRAINING_PIPELINE_NAME | training-pipeline |

Mark **SP_APP_SECRET** variable as a secret one.

Expand Down Expand Up @@ -88,6 +89,7 @@ Check out created resources in the [Azure Portal](portal.azure.com):

Alternatively, you can also use a [cleaning pipeline](../environment_setup/iac-remove-environment.yml) that removes resources created for this project or you can just delete a resource group in the [Azure Portal](portal.azure.com).

Once this resource group is created, be sure that the Service Principal you have created has access to this resource group.

### 6. Set up Build Pipeline

Expand Down Expand Up @@ -127,9 +129,11 @@ Rename the default "Stage 1" to **Invoke Training Pipeline** and make sure that
Add a **Command Line Script** step, rename it to **Run Training Pipeline** with the following script:

```bash
docker run -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
-w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
-e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
docker run -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
-w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
-e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
-e SUBSCRIPTION_ID=$SUBSCRIPTION_ID -e RELEASE_RELEASEID=$RELEASE_RELEASEID \
-e BUILD_BUILDID=$BUILD_BUILDID -e BASE_NAME=$BASE_NAME \
mcr.microsoft.com/mlops/python:latest python run_train_pipeline.py
```

Expand Down
3 changes: 2 additions & 1 deletion environment_setup/arm-templates/cloud-environment.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"southeastasia",
"westcentralus",
"westeurope",
"westus2"
"westus2",
"centralus"
],
"metadata": {
"description": "Specifies the location for all resources."
Expand Down
2 changes: 1 addition & 1 deletion environment_setup/iac-create-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ steps:
location: $(LOCATION)
templateLocation: 'Linked artifact'
csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
overrideParameters: '-baseName $(BASE_NAME)'
overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
deploymentMode: 'Incremental'
displayName: 'Deploy MLOps resources to Azure'

Expand Down
1 change: 0 additions & 1 deletion environment_setup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pytest==4.3.0
requests>=2.22
azureml>=0.2
azureml-sdk>=1.0
python-dotenv>=0.10.3
flake8
Expand Down
Empty file added ml_service/__init__.py
Empty file.
Loading

0 comments on commit eceb936

Please sign in to comment.