Skip to content

Commit

Permalink
Simplified build with Azure CLI; various bugfixes
Browse files Browse the repository at this point in the history
* Use Azure CLI tasks to remove need for variables SP_APP_ID, SP_APP_SECRET, SUBSCRIPTION_ID, TENANT_ID
* Updated gettin_started.md to point out variable path to artifact (microsoft#117)
* Made random hyperparameter (ridge regression alpha) a pipeline parameter, setting its value in the DevOps pipeline (microsoft#107)
* Changed unit test to test actual training code (partially solves microsoft#74)
* Fixed: attach_compute should set exit code after exception (microsoft#113)
* Fixed scoring endpoint HTTP behavior (microsoft#110)
* Fixed PipelineParameters format in call to Azure ML Extension (microsoft#118)
* Fixed Model build environment for Azure Web App for containers (microsoft#119)
  • Loading branch information
algattik authored Nov 28, 2019
1 parent f69a2ca commit 9860764
Show file tree
Hide file tree
Showing 20 changed files with 358 additions and 313 deletions.
1 change: 1 addition & 0 deletions .pipelines/azdo-base-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ steps:
enabled: 'true'

- script: |
pip install --user -r $(Build.SourcesDirectory)/tests/requirements.txt
pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit
displayName: 'Run unit tests'
enabled: 'true'
Expand Down
66 changes: 43 additions & 23 deletions .pipelines/azdo-ci-build-train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ trigger:
exclude:
- docs/
- environment_setup/
- charts/
- ml_service/util/create_scoring_image.py
- ml_service/util/create_scoring_image.*

variables:
- template: azdo-variables.yml
Expand All @@ -27,12 +26,15 @@ stages:
timeoutInMinutes: 0
steps:
- template: azdo-base-pipeline.yml
- script: |
# Invoke the Python building and publishing a training pipeline
python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
failOnStderr: 'false'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
# Invoke the Python building and publishing a training pipeline
python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
displayName: 'Publish Azure Machine Learning Pipeline'
- stage: 'Trigger_AML_Pipeline'
displayName: 'Train, evaluate, register model via previously published AML pipeline'
Expand All @@ -45,30 +47,42 @@ stages:
container: mcr.microsoft.com/mlops/python:latest
timeoutInMinutes: 0
steps:
- script: |
python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py
# Set AMLPIPELINEID variable for next AML Pipeline task in next job
source $(Build.SourcesDirectory)/tmp.sh
echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINE_ID"
rm $(Build.SourcesDirectory)/tmp.sh
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
# Set AMLPIPELINEID variable for next AML Pipeline task in next job
echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$(cat pipeline_id.txt)"
name: 'getpipelineid'
displayName: 'Get Pipeline ID'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
- bash: |
# Generate a hyperparameter value as a random number between 0 and 1.
# A random value is used here to make the Azure ML dashboards "interesting" when testing
# the solution sample.
alpha=$(printf "0.%03d\n" $((($RANDOM*1000)/32767)))
echo "Alpha: $alpha"
echo "##vso[task.setvariable variable=ALPHA;isOutput=true]$alpha"
name: 'getalpha'
displayName: 'Generate random value for hyperparameter alpha'
- job: "Run_ML_Pipeline"
dependsOn: "Get_Pipeline_ID"
displayName: "Trigger ML Training Pipeline"
pool: server
variables:
AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ]
ALPHA: $[ dependencies.Get_Pipeline_ID.outputs['getalpha.ALPHA'] ]
steps:
- task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
displayName: 'Invoke ML pipeline'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
PipelineId: '$(AMLPIPELINE_ID)'
ExperimentName: '$(EXPERIMENT_NAME)'
PipelineParameters: '"model_name": "sklearn_regression_model.pkl"'
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}'
- job: "Training_Run_Report"
dependsOn: "Run_ML_Pipeline"
displayName: "Determine if evaluation succeeded and new model is registered"
Expand All @@ -77,22 +91,28 @@ stages:
container: mcr.microsoft.com/mlops/python:latest
timeoutInMinutes: 0
steps:
- script: |
python $(Build.SourcesDirectory)/code/register/register_model.py --build_id $(Build.BuildId) --validate True
displayName: 'Check if new model registered'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --model_name "$(MODEL_NAME)" --output_model_version_file "model_version.txt"
echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$(cat model_version.txt)"
displayName: "Determine if evaluation succeeded and new model is registered"
- task: CopyFiles@2
displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)'
inputs:
SourceFolder: '$(Build.SourcesDirectory)'
TargetFolder: '$(Build.ArtifactStagingDirectory)'
Contents: |
code/scoring/**
ml_service/util/**
- task: PublishBuildArtifacts@1
displayName: 'Publish Artifact'
inputs:
ArtifactName: 'mlops-pipelines'
publishLocation: 'container'
pathtoPublish: '$(Build.ArtifactStagingDirectory)'
TargetPath: '$(Build.ArtifactStagingDirectory)'
TargetPath: '$(Build.ArtifactStagingDirectory)'
8 changes: 1 addition & 7 deletions code/evaluate/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import sys
from azureml.core import Run, Workspace, Experiment
import argparse
from azureml.core.authentication import ServicePrincipalAuthentication
import traceback

run = Run.get_context()
Expand All @@ -48,16 +47,11 @@
build_id = os.environ.get('BUILD_BUILDID')
# run_id useful to query previous runs
run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70"
service_principal = ServicePrincipalAuthentication(
tenant_id=tenant_id,
service_principal_id=app_id,
service_principal_password=app_secret)

aml_workspace = Workspace.get(
name=workspace_name,
subscription_id=subscription_id,
resource_group=resource_group,
auth=service_principal
resource_group=resource_group
)
ws = aml_workspace
exp = Experiment(ws, experiment_name)
Expand Down
48 changes: 5 additions & 43 deletions code/register/register_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,45 +29,30 @@
import traceback
from azureml.core import Run, Experiment, Workspace
from azureml.core.model import Model as AMLModel
from azureml.core.authentication import ServicePrincipalAuthentication


def main():

run = Run.get_context()
if (run.id.startswith('OfflineRun')):
from dotenv import load_dotenv
sys.path.append(os.path.abspath("./code/util")) # NOQA: E402
from model_helper import get_model_by_tag
# For local development, set values in this section
load_dotenv()
workspace_name = os.environ.get("WORKSPACE_NAME")
experiment_name = os.environ.get("EXPERIMENT_NAME")
resource_group = os.environ.get("RESOURCE_GROUP")
subscription_id = os.environ.get("SUBSCRIPTION_ID")
tenant_id = os.environ.get("TENANT_ID")
model_name = os.environ.get("MODEL_NAME")
app_id = os.environ.get('SP_APP_ID')
app_secret = os.environ.get('SP_APP_SECRET')
build_id = os.environ.get('BUILD_BUILDID')
# run_id useful to query previous runs
run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
service_principal = ServicePrincipalAuthentication(
tenant_id=tenant_id,
service_principal_id=app_id,
service_principal_password=app_secret)

aml_workspace = Workspace.get(
name=workspace_name,
subscription_id=subscription_id,
resource_group=resource_group,
auth=service_principal
resource_group=resource_group
)
ws = aml_workspace
exp = Experiment(ws, experiment_name)
else:
sys.path.append(os.path.abspath("./util")) # NOQA: E402
from model_helper import get_model_by_tag
ws = run.experiment.workspace
exp = run.experiment
run_id = 'amlcompute'
Expand All @@ -89,12 +74,6 @@ def main():
help="Name of the Model",
default="sklearn_regression_model.pkl",
)
parser.add_argument(
"--validate",
type=str,
help="Set to true to only validate if model is registered for run",
default=False,
)

args = parser.parse_args()
if (args.build_id is not None):
Expand All @@ -103,30 +82,13 @@ def main():
run_id = args.run_id
if (run_id == 'amlcompute'):
run_id = run.parent.id
if (args.validate is not None):
validate = args.validate
model_name = args.model_name

if (validate):
try:
tag_name = 'BuildId'
model = get_model_by_tag(
model_name, tag_name, build_id, exp.workspace)
if (model is not None):
print("Model was registered for this build.")
if (model is None):
print("Model was not registered for this run.")
sys.exit(1)
except Exception as e:
print(e)
print("Model was not registered for this run.")
sys.exit(1)
if (build_id is None):
register_aml_model(model_name, exp, run_id)
else:
if (build_id is None):
register_aml_model(model_name, exp, run_id)
else:
run.tag("BuildId", value=build_id)
register_aml_model(model_name, exp, run_id, build_id)
run.tag("BuildId", value=build_id)
register_aml_model(model_name, exp, run_id, build_id)


def model_already_registered(model_name, exp, run_id):
Expand Down
12 changes: 4 additions & 8 deletions code/scoring/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,10 @@ def init():


def run(raw_data):
try:
data = json.loads(raw_data)["data"]
data = numpy.array(data)
result = model.predict(data)
return json.dumps({"result": result.tolist()})
except Exception as e:
result = str(e)
return json.dumps({"error": result})
data = json.loads(raw_data)["data"]
data = numpy.array(data)
result = model.predict(data)
return {"result": result.tolist()}


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 9860764

Please sign in to comment.