Tox + code coverage (#150)

microsoft · Jan 31, 2020 · e0e6d22 · e0e6d22
1 parent 57ef170
commit e0e6d22
Show file tree

Hide file tree

Showing 16 changed files with 88 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -47,6 +47,8 @@ coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
+*-testresults.xml
+test-output.xml
 
 # Translations
 *.mo

diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml
@@ -1,26 +1,21 @@
-# this pipeline should be ignored for now
-parameters:
-  pipelineType: 'training'
-
 steps:
 - script: |
-   flake8 --output-file=$(Build.BinariesDirectory)/lint-testresults.xml --format junit-xml  
-  workingDirectory: '$(Build.SourcesDirectory)'
-  displayName: 'Run code quality tests'
-  enabled: 'true'
-
-- script: |
-   python -m pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit
-  displayName: 'Run unit tests'
-  enabled: 'true'
-  env:
-    SP_APP_SECRET: '$(SP_APP_SECRET)'
+    ./lint-and-test.sh
+  displayName: 'Linting & unit tests'
 
 - task: PublishTestResults@2
   condition: succeededOrFailed()
   inputs:
-    testResultsFiles: '$(Build.BinariesDirectory)/*-testresults.xml'
+    testResultsFiles: '*-testresults.xml'
     testRunTitle: 'Linting & Unit tests'
     failTaskOnFailedTests: true
-  displayName: 'Publish linting and unit test results'
-  enabled: 'true'
+  displayName: 'Publish test results'
+
+- task: PublishCodeCoverageResults@1
+  displayName: 'Publish coverage report'
+  condition: succeededOrFailed()
+  inputs:
+    codeCoverageTool: Cobertura
+    summaryFileLocation: 'coverage.xml'
+    reportDirectory: 'htmlcov'
+    failIfCoverageEmpty: true
diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
@@ -4,16 +4,14 @@ pr:
     include:
     - master
 
-pool: 
+pool:
   vmImage: 'ubuntu-latest'
 
 container: mcr.microsoft.com/mlops/python:latest  
 
-
 variables:
 - template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
-
 steps:
 - template: azdo-base-pipeline.yml
diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml
@@ -14,15 +14,15 @@ variables:
 - template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
+pool:
+  vmImage: ubuntu-latest
 
 stages:
 - stage: 'Model_CI'
   displayName: 'Model CI'
   jobs:
   - job: "Model_CI_Pipeline"
     displayName: "Model CI Pipeline"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -45,8 +45,6 @@ stages:
   - job: "Get_Pipeline_ID"
     condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true'))
     displayName: "Get Pipeline ID for execution"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -92,8 +90,6 @@ stages:
     dependsOn: "Run_ML_Pipeline"
     condition: always()
     displayName: "Determine if evaluation succeeded and new model is registered"
-    pool:
-        vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -105,8 +101,6 @@ stages:
   jobs:
   - job: "Deploy_ACI"
     displayName: "Deploy to ACI"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -140,8 +134,6 @@ stages:
   jobs:
   - job: "Deploy_AKS"
     displayName: "Deploy to AKS"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -176,8 +168,6 @@ stages:
   jobs:
   - job: "Deploy_Webapp"
     displayName: "Deploy to Webapp"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:

diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ This reference architecture shows how to implement continuous integration (CI),
 
 Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it.
 
-[Azure DevOps release pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/release/?view=azure-devops) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
+The [Azure DevOps multi-stage pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/stages?view=azure-devops&tabs=yaml) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
 
 
 ### Repo Details

diff --git a/diabetes_regression/scoring/score.py b/diabetes_regression/scoring/score.py
@@ -79,7 +79,7 @@ def run(data, request_headers):
                request_headers.get("X-Ms-Request-Id", ""),
                request_headers.get("Traceparent", ""),
                len(result)
-           ))
+    ))
 
     return {"result": result.tolist()}
 

diff --git a/diabetes_regression/training/R/train_with_r_on_databricks.py b/diabetes_regression/training/R/train_with_r_on_databricks.py
@@ -11,5 +11,5 @@
 args, unknown = parser.parse_known_args()
 folder = args.AZUREML_SCRIPT_DIRECTORY_NAME
 
-os.system("cd " + "/dbfs/" + folder +
-          " && Rscript r_train.r && ls -ltr model.rds")
+os.system("cd " + "/dbfs/" + folder
+          + " && Rscript r_train.r && ls -ltr model.rds")
diff --git a/docs/development_setup.md b/docs/development_setup.md
@@ -0,0 +1,44 @@
+## Development environment setup
+
+### Setup
+
+Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+
+In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
+
+### Installation
+
+[Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
+
+Create a virtual environment using [venv](https://docs.python.org/3/library/venv.html), [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). 
+
+Here is an example for setting up and activating a `venv` environment with Python 3:
+
+```
+python3 -mvenv .venv
+source .venv/bin/activate
+```
+
+Install the required Python modules in your virtual environment.
+
+```
+pip install -r environment_setup/requirements.txt 
+```
+
+### Running local code
+
+To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line):
+
+```
+export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py
+```
+
+BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the
+`build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is
+set to the current build number. In a local environment, we can use a command such as
+`uuidgen` so set a different random identifier on each run, ensuring there are 
+no collisions.
+
+### Local testing
+
+Before committing, run `tox` to execute linter and unit test checks.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -80,10 +80,7 @@ There are more variables used in the project. They're defined in two places, one
 
 ### Local configuration
 
-In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
-
-For local development, you will also need to [install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
-Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+For instructions on how to set up a local development environment, refer to the [Development environment setup instructions](development_setup.md).
 
 ### Azure DevOps configuration
 

diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh
@@ -26,6 +26,4 @@
 
 
 python --version
-pip install azure-cli==2.0.46
-pip install --upgrade azureml-sdk[cli]
 pip install -r requirements.txt
diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt
@@ -1,10 +1,12 @@
 pytest>=5.3
+pytest-cov>=2.8.1
 requests>=2.22
 numpy>=1.17
 pandas>=0.25
 scikit-learn>=0.21.3
 azureml-sdk>=1.0
 python-dotenv>=0.10.3
 flake8>=3.7
-flake8_formatter_junit_xml
+flake8_formatter_junit_xml>=0.0.6
+tox>=3.14.3
 azure-cli==2.0.76
diff --git a/lint-and-test.sh b/lint-and-test.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+set -eux
+flake8 --output-file=lint-testresults.xml --format junit-xml
+python -m pytest tests/unit --cov=diabetes_regression --cov-report=html --cov-report=xml --junitxml=unit-testresults.xml
diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py
@@ -49,7 +49,7 @@ def call_web_app(url, headers):
             response.raise_for_status()
             return response.json()
         except requests.exceptions.HTTPError as e:
-            if i == retries-1:
+            if i == retries - 1:
                 raise e
             print(e)
             print("Retrying...")

diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
@@ -5,9 +5,9 @@
 
 
 def test_train_model():
-    X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1,  1)
+    X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)
     y_train = np.array([10, 9, 8, 8, 6, 5])
-    X_test = np.array([3, 4]).reshape(-1,  1)
+    X_test = np.array([3, 4]).reshape(-1, 1)
     y_test = np.array([8, 7])
     data = {"train": {"X": X_train, "y": y_train},
             "test": {"X": X_test, "y": y_test}}

diff --git a/tests/unit/data_test.py b/tests/unit/data_test.py
@@ -120,8 +120,8 @@ def test_check_distribution():
     mean = np.mean(dataset.values, axis=0)
     std = np.mean(dataset.values, axis=0)
     assert (
-        np.sum(abs(mean - historical_mean) >
-               shift_tolerance * abs(historical_mean))
-        or np.sum(abs(std - historical_std) >
-                  shift_tolerance * abs(historical_std)) > 0
+        np.sum(abs(mean - historical_mean)
+               > shift_tolerance * abs(historical_mean))
+        or np.sum(abs(std - historical_std)
+                  > shift_tolerance * abs(historical_std)) > 0
     )
diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,7 @@
+[flake8]
+# ignore obsolete warning
+ignore = W503
+exclude = .git,__pycache__,.venv,.tox,**/site-packages/**/*.py,**/lib/**.py,**/bin/**.py
+
+[pytest]
+junit_family = legacy