microsoft · eedorenko · Jan 31, 2020 · Nov 28, 2019 · Jan 19, 2020 · Jan 24, 2020
diff --git a/.gitignore b/.gitignore
@@ -47,6 +47,8 @@ coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
+*-testresults.xml
+test-output.xml
 
 # Translations
 *.mo

diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml
@@ -1,26 +1,20 @@
-# this pipeline should be ignored for now
-parameters:
-  pipelineType: 'training'
-
 steps:
 - script: |
-   flake8 --output-file=$(Build.BinariesDirectory)/lint-testresults.xml --format junit-xml  
-  workingDirectory: '$(Build.SourcesDirectory)'
-  displayName: 'Run code quality tests'
-  enabled: 'true'
-
-- script: |
-   pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit
-  displayName: 'Run unit tests'
-  enabled: 'true'
-  env:
-    SP_APP_SECRET: '$(SP_APP_SECRET)'
+   tox
+  displayName: 'Linting & unit tests'
 
 - task: PublishTestResults@2
   condition: succeededOrFailed()
   inputs:
-    testResultsFiles: '$(Build.BinariesDirectory)/*-testresults.xml'
+    testResultsFiles: '*-testresults.xml'
     testRunTitle: 'Linting & Unit tests'
     failTaskOnFailedTests: true
-  displayName: 'Publish linting and unit test results'
-  enabled: 'true'
+  displayName: 'Publish test results'
+
+- task: PublishCodeCoverageResults@1
+  displayName: 'Publish coverage report'
+  condition: succeededOrFailed()
+  inputs:
+    codeCoverageTool: Cobertura
+    summaryFileLocation: 'coverage.xml'
+    failIfCoverageEmpty: true
diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
@@ -4,16 +4,14 @@ pr:
     include:
     - master
 
-pool: 
+pool:
   vmImage: 'ubuntu-latest'
 
 container: mcr.microsoft.com/mlops/python:latest  
 
-
 variables:
 - template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
-
 steps:
 - template: azdo-base-pipeline.yml
diff --git a/.pipelines/azdo-template-get-model-version.yml b/.pipelines/azdo-template-get-model-version.yml
@@ -6,7 +6,7 @@ steps:
     inlineScript: |
       set -e # fail on error
       export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
-      python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --output_model_version_file "model_version.txt"
+      python ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --output_model_version_file "model_version.txt"
       # Output model version to Azure DevOps job
       MODEL_VERSION="$(cat model_version.txt)"
       echo "##vso[task.setvariable variable=MODEL_VERSION]$MODEL_VERSION"

diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml
@@ -14,15 +14,15 @@ variables:
 - template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
+pool:
+  vmImage: ubuntu-latest
 
 stages:
 - stage: 'Model_CI'
   displayName: 'Model CI'
   jobs:
   - job: "Model_CI_Pipeline"
     displayName: "Model CI Pipeline"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -35,7 +35,7 @@ stages:
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           # Invoke the Python building and publishing a training pipeline
-          python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
+          python ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
       displayName: 'Publish Azure Machine Learning Pipeline'
 
 - stage: 'Trigger_AML_Pipeline'
@@ -44,8 +44,6 @@ stages:
   - job: "Get_Pipeline_ID"
     condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true'))
     displayName: "Get Pipeline ID for execution"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -56,7 +54,7 @@ stages:
         inlineScript: |
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
-          python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
+          python ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
           # Set AMLPIPELINEID variable for next AML Pipeline task in next job
           AMLPIPELINEID="$(cat pipeline_id.txt)"
           echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
@@ -90,8 +88,6 @@ stages:
     dependsOn: "Run_ML_Pipeline"
     condition: always()
     displayName: "Determine if evaluation succeeded and new model is registered"
-    pool:
-        vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -103,8 +99,6 @@ stages:
   jobs:
   - job: "Deploy_ACI"
     displayName: "Deploy to ACI"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -138,8 +132,6 @@ stages:
   jobs:
   - job: "Deploy_AKS"
     displayName: "Deploy to AKS"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -174,8 +166,6 @@ stages:
   jobs:
   - job: "Deploy_Webapp"
     displayName: "Deploy to Webapp"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:

diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ This reference architecture shows how to implement continuous integration (CI),
 
 Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it.
 
-[Azure DevOps release pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/release/?view=azure-devops) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
+The [Azure DevOps multi-stage pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/stages?view=azure-devops&tabs=yaml) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
 
 
 ### Repo Details

diff --git a/diabetes_regression/scoring/score.py b/diabetes_regression/scoring/score.py
@@ -79,7 +79,7 @@ def run(data, request_headers):
                request_headers.get("X-Ms-Request-Id", ""),
                request_headers.get("Traceparent", ""),
                len(result)
-           ))
+    ))
 
     return {"result": result.tolist()}
 

diff --git a/diabetes_regression/training/R/train_with_r_on_databricks.py b/diabetes_regression/training/R/train_with_r_on_databricks.py
@@ -11,5 +11,5 @@
 args, unknown = parser.parse_known_args()
 folder = args.AZUREML_SCRIPT_DIRECTORY_NAME
 
-os.system("cd " + "/dbfs/" + folder +
-          " && Rscript r_train.r && ls -ltr model.rds")
+os.system("cd " + "/dbfs/" + folder
+          + " && Rscript r_train.r && ls -ltr model.rds")
diff --git a/docs/development_setup.md b/docs/development_setup.md
@@ -0,0 +1,44 @@
+## Development environment setup
+
+### Setup
+
+Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+
+In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
+
+### Installation
+
+[Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
+
+Create a virtual environment using [venv](https://docs.python.org/3/library/venv.html), [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). 
+
+Here is an example for setting up and activating a `venv` environment with Python 3:
+
+```
+python3 -mvenv .venv
+source .venv/bin/activate
+```
+
+Install the required Python modules in your virtual environment.
+
+```
+pip install -r environment_setup/requirements.txt 
+```
+
+### Running local code
+
+To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line):
+
+```
+export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py
+```
+
+BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the
+`build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is
+set to the current build number. In a local environment, we can use a command such as
+`uuidgen` so set a different random identifier on each run, ensuring there are 
+no collisions.
+
+### Local testing
+
+Before committing, run `tox` to execute linter and unit test checks.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -80,10 +80,7 @@ There are more variables used in the project. They're defined in two places, one
 
 ### Local configuration
 
-In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
-
-For local development, you will also need to [install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
-Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+For instructions on how to set up a local development environment, refer to the [Development environment setup instructions](development_setup.md).
 
 ### Azure DevOps configuration
 

diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile
@@ -4,11 +4,20 @@ LABEL org.label-schema.vendor = "Microsoft" \
       org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \
       org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython"
 
-
+# Utilities for package installation
+RUN apt-get update && apt-get install -y \
+    curl \
+    apt-transport-https \
+    gcc
 
-COPY environment_setup/requirements.txt  /setup/
-
-RUN apt-get update && apt-get install gcc -y && pip install --upgrade -r /setup/requirements.txt && \ 
-    conda install -c r r-essentials
+# Install dotnet runtime used to generate code coverage report
+RUN curl -O https://packages.microsoft.com/config/ubuntu/19.04/packages-microsoft-prod.deb \
+    && dpkg -i packages-microsoft-prod.deb \
+    && apt-get update && apt-get install -y dotnet-runtime-3.1
+
+# Packages for running R
+RUN conda install -c r r-essentials
 
-CMD ["python"]
+# Install Python modules
+COPY environment_setup/requirements.txt  /setup/
+RUN pip install --upgrade -r /setup/requirements.txt
diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh
@@ -26,6 +26,4 @@
 
 
 python --version
-pip install azure-cli==2.0.46
-pip install --upgrade azureml-sdk[cli]
 pip install -r requirements.txt
diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt
@@ -1,10 +1,12 @@
 pytest>=5.3
+pytest-cov>=2.8.1
 requests>=2.22
 numpy>=1.17
 pandas>=0.25
 scikit-learn>=0.21.3
 azureml-sdk>=1.0
 python-dotenv>=0.10.3
 flake8>=3.7
-flake8_formatter_junit_xml
+flake8_formatter_junit_xml>=0.0.6
+tox>=3.14.3
 azure-cli==2.0.76
diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py
@@ -52,7 +52,7 @@ def call_web_app(url, headers):
             response.raise_for_status()
             return response.json()
         except requests.exceptions.HTTPError as e:
-            if i == retries-1:
+            if i == retries - 1:
                 raise e
             print(e)
             print("Retrying...")

diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
@@ -9,9 +9,9 @@
 
 
 def test_train_model():
-    X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1,  1)
+    X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)
     y_train = np.array([10, 9, 8, 8, 6, 5])
-    X_test = np.array([3, 4]).reshape(-1,  1)
+    X_test = np.array([3, 4]).reshape(-1, 1)
     y_test = np.array([8, 7])
     data = {"train": {"X": X_train, "y": y_train},
             "test": {"X": X_test, "y": y_test}}

diff --git a/tests/unit/data_test.py b/tests/unit/data_test.py
@@ -120,8 +120,8 @@ def test_check_distribution():
     mean = np.mean(dataset.values, axis=0)
     std = np.mean(dataset.values, axis=0)
     assert (
-        np.sum(abs(mean - historical_mean) >
-               shift_tolerance * abs(historical_mean))
-        or np.sum(abs(std - historical_std) >
-                  shift_tolerance * abs(historical_std)) > 0
+        np.sum(abs(mean - historical_mean)
+               > shift_tolerance * abs(historical_mean))
+        or np.sum(abs(std - historical_std)
+                  > shift_tolerance * abs(historical_std)) > 0
     )
diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,20 @@
+[tox]
+minversion = 3.14.3
+skipsdist = True
+
+[flake8]
+# ignore obsolete warning
+ignore = W503
+exclude = .git,__pycache__,.venv,.tox,**/site-packages/**/*.py,**/lib/**.py,**/bin/**.py
+
+[pytest]
+junit_family = legacy
+
+[testenv]
+whitelist_externals =
+  flake8
+  pytest
+passenv = HOME
+commands =
+  flake8 --output-file=lint-testresults.xml --format junit-xml
+  pytest tests/unit --cov=diabetes_regression --cov-report=xml --junitxml=unit-testresults.xml