Simplified build with Azure CLI; various bugfixes

* Use Azure CLI tasks to remove need for variables SP_APP_ID, SP_APP_SECRET, SUBSCRIPTION_ID, TENANT_ID * Updated gettin_started.md to point out variable path to artifact (microsoft#117) * Made random hyperparameter (ridge regression alpha) a pipeline parameter, setting its value in the DevOps pipeline (microsoft#107) * Changed unit test to test actual training code (partially solves microsoft#74) * Fixed: attach_compute should set exit code after exception (microsoft#113) * Fixed scoring endpoint HTTP behavior (microsoft#110) * Fixed PipelineParameters format in call to Azure ML Extension (microsoft#118) * Fixed Model build environment for Azure Web App for containers (microsoft#119)
algattik · Nov 28, 2019 · 9860764 · 9860764
1 parent f69a2ca
commit 9860764
Show file tree

Hide file tree

Showing 20 changed files with 358 additions and 313 deletions.
diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml
@@ -10,6 +10,7 @@ steps:
   enabled: 'true'
 
 - script: |
+   pip install --user -r $(Build.SourcesDirectory)/tests/requirements.txt
    pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit
   displayName: 'Run unit tests'
   enabled: 'true'

diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
@@ -7,8 +7,7 @@ trigger:
     exclude:
     - docs/
     - environment_setup/
-    - charts/
-    - ml_service/util/create_scoring_image.py
+    - ml_service/util/create_scoring_image.*
 
 variables:
 - template: azdo-variables.yml
@@ -27,12 +26,15 @@ stages:
     timeoutInMinutes: 0
     steps:
     - template: azdo-base-pipeline.yml
-    - script: |
-        # Invoke the Python building and publishing a training pipeline
-        python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
-      failOnStderr: 'false'
-      env:
-        SP_APP_SECRET: '$(SP_APP_SECRET)'
+    - task: AzureCLI@1
+      inputs:
+        azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+        scriptLocation: inlineScript
+        inlineScript: |
+          set -e # fail on error
+          export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+          # Invoke the Python building and publishing a training pipeline
+          python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
       displayName: 'Publish Azure Machine Learning Pipeline'
 - stage: 'Trigger_AML_Pipeline'
   displayName: 'Train, evaluate, register model via previously published AML pipeline'
@@ -45,30 +47,42 @@ stages:
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
-    - script: |
-        python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py
-        # Set AMLPIPELINEID variable for next AML Pipeline task in next job
-        source $(Build.SourcesDirectory)/tmp.sh
-        echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINE_ID"
-        rm $(Build.SourcesDirectory)/tmp.sh
+    - task: AzureCLI@1
+      inputs:
+        azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+        scriptLocation: inlineScript
+        inlineScript: |
+          set -e # fail on error
+          export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+          python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
+          # Set AMLPIPELINEID variable for next AML Pipeline task in next job
+          echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$(cat pipeline_id.txt)"
       name: 'getpipelineid'
       displayName: 'Get Pipeline ID'
-      env:
-        SP_APP_SECRET: '$(SP_APP_SECRET)'
+    - bash: |
+          # Generate a hyperparameter value as a random number between 0 and 1.
+          # A random value is used here to make the Azure ML dashboards "interesting" when testing
+          # the solution sample.
+          alpha=$(printf "0.%03d\n" $((($RANDOM*1000)/32767)))
+          echo "Alpha: $alpha"
+          echo "##vso[task.setvariable variable=ALPHA;isOutput=true]$alpha"
+      name: 'getalpha'
+      displayName: 'Generate random value for hyperparameter alpha'
   - job: "Run_ML_Pipeline"
     dependsOn: "Get_Pipeline_ID"
     displayName: "Trigger ML Training Pipeline"
     pool: server
     variables:
       AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ]
+      ALPHA: $[ dependencies.Get_Pipeline_ID.outputs['getalpha.ALPHA'] ]
     steps:
     - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
       displayName: 'Invoke ML pipeline'
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         PipelineId: '$(AMLPIPELINE_ID)'
         ExperimentName: '$(EXPERIMENT_NAME)'
-        PipelineParameters: '"model_name": "sklearn_regression_model.pkl"'
+        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}'
   - job: "Training_Run_Report"
     dependsOn: "Run_ML_Pipeline"
     displayName: "Determine if evaluation succeeded and new model is registered"
@@ -77,22 +91,28 @@ stages:
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
-    - script: |
-        python $(Build.SourcesDirectory)/code/register/register_model.py --build_id $(Build.BuildId) --validate True
-      displayName: 'Check if new model registered'
-      env:
-        SP_APP_SECRET: '$(SP_APP_SECRET)'
+    - task: AzureCLI@1
+      inputs:
+        azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+        scriptLocation: inlineScript
+        inlineScript: |
+          set -e # fail on error
+          export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+          python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --model_name "$(MODEL_NAME)" --output_model_version_file "model_version.txt"
+          echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$(cat model_version.txt)"
+      displayName: "Determine if evaluation succeeded and new model is registered"
     - task: CopyFiles@2
       displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)'
       inputs:
         SourceFolder: '$(Build.SourcesDirectory)'
         TargetFolder: '$(Build.ArtifactStagingDirectory)'
         Contents: |
           code/scoring/**
+          ml_service/util/**
     - task: PublishBuildArtifacts@1
       displayName: 'Publish Artifact'
       inputs:
         ArtifactName: 'mlops-pipelines'
         publishLocation: 'container'
         pathtoPublish: '$(Build.ArtifactStagingDirectory)' 
-        TargetPath: '$(Build.ArtifactStagingDirectory)'
+        TargetPath: '$(Build.ArtifactStagingDirectory)'
diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py
@@ -27,7 +27,6 @@
 import sys
 from azureml.core import Run, Workspace, Experiment
 import argparse
-from azureml.core.authentication import ServicePrincipalAuthentication
 import traceback
 
 run = Run.get_context()
@@ -48,16 +47,11 @@
     build_id = os.environ.get('BUILD_BUILDID')
     # run_id useful to query previous runs
     run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70"
-    service_principal = ServicePrincipalAuthentication(
-        tenant_id=tenant_id,
-        service_principal_id=app_id,
-        service_principal_password=app_secret)
 
     aml_workspace = Workspace.get(
         name=workspace_name,
         subscription_id=subscription_id,
-        resource_group=resource_group,
-        auth=service_principal
+        resource_group=resource_group
     )
     ws = aml_workspace
     exp = Experiment(ws, experiment_name)

diff --git a/code/register/register_model.py b/code/register/register_model.py
@@ -29,45 +29,30 @@
 import traceback
 from azureml.core import Run, Experiment, Workspace
 from azureml.core.model import Model as AMLModel
-from azureml.core.authentication import ServicePrincipalAuthentication
 
 
 def main():
 
     run = Run.get_context()
     if (run.id.startswith('OfflineRun')):
         from dotenv import load_dotenv
-        sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
-        from model_helper import get_model_by_tag
         # For local development, set values in this section
         load_dotenv()
         workspace_name = os.environ.get("WORKSPACE_NAME")
         experiment_name = os.environ.get("EXPERIMENT_NAME")
         resource_group = os.environ.get("RESOURCE_GROUP")
         subscription_id = os.environ.get("SUBSCRIPTION_ID")
-        tenant_id = os.environ.get("TENANT_ID")
-        model_name = os.environ.get("MODEL_NAME")
-        app_id = os.environ.get('SP_APP_ID')
-        app_secret = os.environ.get('SP_APP_SECRET')
         build_id = os.environ.get('BUILD_BUILDID')
         # run_id useful to query previous runs
         run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
-        service_principal = ServicePrincipalAuthentication(
-            tenant_id=tenant_id,
-            service_principal_id=app_id,
-            service_principal_password=app_secret)
-
         aml_workspace = Workspace.get(
             name=workspace_name,
             subscription_id=subscription_id,
-            resource_group=resource_group,
-            auth=service_principal
+            resource_group=resource_group
         )
         ws = aml_workspace
         exp = Experiment(ws, experiment_name)
     else:
-        sys.path.append(os.path.abspath("./util"))  # NOQA: E402
-        from model_helper import get_model_by_tag
         ws = run.experiment.workspace
         exp = run.experiment
         run_id = 'amlcompute'
@@ -89,12 +74,6 @@ def main():
         help="Name of the Model",
         default="sklearn_regression_model.pkl",
     )
-    parser.add_argument(
-        "--validate",
-        type=str,
-        help="Set to true to only validate if model is registered for run",
-        default=False,
-    )
 
     args = parser.parse_args()
     if (args.build_id is not None):
@@ -103,30 +82,13 @@ def main():
         run_id = args.run_id
     if (run_id == 'amlcompute'):
         run_id = run.parent.id
-    if (args.validate is not None):
-        validate = args.validate
     model_name = args.model_name
 
-    if (validate):
-        try:
-            tag_name = 'BuildId'
-            model = get_model_by_tag(
-                model_name, tag_name, build_id, exp.workspace)
-            if (model is not None):
-                print("Model was registered for this build.")
-            if (model is None):
-                print("Model was not registered for this run.")
-                sys.exit(1)
-        except Exception as e:
-            print(e)
-            print("Model was not registered for this run.")
-            sys.exit(1)
+    if (build_id is None):
+        register_aml_model(model_name, exp, run_id)
     else:
-        if (build_id is None):
-            register_aml_model(model_name, exp, run_id)
-        else:
-            run.tag("BuildId", value=build_id)
-            register_aml_model(model_name, exp, run_id, build_id)
+        run.tag("BuildId", value=build_id)
+        register_aml_model(model_name, exp, run_id, build_id)
 
 
 def model_already_registered(model_name, exp, run_id):

diff --git a/code/scoring/score.py b/code/scoring/score.py
@@ -39,14 +39,10 @@ def init():
 
 
 def run(raw_data):
-    try:
-        data = json.loads(raw_data)["data"]
-        data = numpy.array(data)
-        result = model.predict(data)
-        return json.dumps({"result": result.tolist()})
-    except Exception as e:
-        result = str(e)
-        return json.dumps({"error": result})
+    data = json.loads(raw_data)["data"]
+    data = numpy.array(data)
+    result = model.predict(data)
+    return {"result": result.tolist()}
 
 
 if __name__ == "__main__":