diff --git a/.gitignore b/.gitignore index 7bac8768..e0a583ec 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,8 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ +*-testresults.xml +test-output.xml # Translations *.mo diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml index c45a7d23..ad0e4ad8 100644 --- a/.pipelines/azdo-base-pipeline.yml +++ b/.pipelines/azdo-base-pipeline.yml @@ -1,26 +1,21 @@ -# this pipeline should be ignored for now -parameters: - pipelineType: 'training' - steps: - script: | - flake8 --output-file=$(Build.BinariesDirectory)/lint-testresults.xml --format junit-xml - workingDirectory: '$(Build.SourcesDirectory)' - displayName: 'Run code quality tests' - enabled: 'true' - -- script: | - python -m pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit - displayName: 'Run unit tests' - enabled: 'true' - env: - SP_APP_SECRET: '$(SP_APP_SECRET)' + ./lint-and-test.sh + displayName: 'Linting & unit tests' - task: PublishTestResults@2 condition: succeededOrFailed() inputs: - testResultsFiles: '$(Build.BinariesDirectory)/*-testresults.xml' + testResultsFiles: '*-testresults.xml' testRunTitle: 'Linting & Unit tests' failTaskOnFailedTests: true - displayName: 'Publish linting and unit test results' - enabled: 'true' + displayName: 'Publish test results' + +- task: PublishCodeCoverageResults@1 + displayName: 'Publish coverage report' + condition: succeededOrFailed() + inputs: + codeCoverageTool: Cobertura + summaryFileLocation: 'coverage.xml' + reportDirectory: 'htmlcov' + failIfCoverageEmpty: true diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml index 76337ab5..a9d3ce74 100644 --- a/.pipelines/azdo-pr-build-train.yml +++ b/.pipelines/azdo-pr-build-train.yml @@ -4,16 +4,14 @@ pr: include: - master -pool: +pool: vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest - variables: - template: diabetes_regression-variables.yml - group: devopsforai-aml-vg - steps: - template: azdo-base-pipeline.yml \ No newline at end of file diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index bcc1249c..b89eb30c 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -14,6 +14,8 @@ variables: - template: diabetes_regression-variables.yml - group: devopsforai-aml-vg +pool: + vmImage: ubuntu-latest stages: - stage: 'Model_CI' @@ -21,8 +23,6 @@ stages: jobs: - job: "Model_CI_Pipeline" displayName: "Model CI Pipeline" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: @@ -45,8 +45,6 @@ stages: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) displayName: "Get Pipeline ID for execution" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: @@ -92,8 +90,6 @@ stages: dependsOn: "Run_ML_Pipeline" condition: always() displayName: "Determine if evaluation succeeded and new model is registered" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: @@ -105,8 +101,6 @@ stages: jobs: - job: "Deploy_ACI" displayName: "Deploy to ACI" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: @@ -140,8 +134,6 @@ stages: jobs: - job: "Deploy_AKS" displayName: "Deploy to AKS" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: @@ -176,8 +168,6 @@ stages: jobs: - job: "Deploy_Webapp" displayName: "Deploy to Webapp" - pool: - vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: diff --git a/README.md b/README.md index c0416cff..ca441c71 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ This reference architecture shows how to implement continuous integration (CI), Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it. -[Azure DevOps release pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/release/?view=azure-devops) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes). +The [Azure DevOps multi-stage pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/stages?view=azure-devops&tabs=yaml) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes). ### Repo Details diff --git a/diabetes_regression/scoring/score.py b/diabetes_regression/scoring/score.py index 10227fcc..4427af05 100644 --- a/diabetes_regression/scoring/score.py +++ b/diabetes_regression/scoring/score.py @@ -79,7 +79,7 @@ def run(data, request_headers): request_headers.get("X-Ms-Request-Id", ""), request_headers.get("Traceparent", ""), len(result) - )) + )) return {"result": result.tolist()} diff --git a/diabetes_regression/training/R/train_with_r_on_databricks.py b/diabetes_regression/training/R/train_with_r_on_databricks.py index 1a120bd0..c571d609 100644 --- a/diabetes_regression/training/R/train_with_r_on_databricks.py +++ b/diabetes_regression/training/R/train_with_r_on_databricks.py @@ -11,5 +11,5 @@ args, unknown = parser.parse_known_args() folder = args.AZUREML_SCRIPT_DIRECTORY_NAME -os.system("cd " + "/dbfs/" + folder + - " && Rscript r_train.r && ls -ltr model.rds") +os.system("cd " + "/dbfs/" + folder + + " && Rscript r_train.r && ls -ltr model.rds") diff --git a/docs/development_setup.md b/docs/development_setup.md new file mode 100644 index 00000000..8565ff04 --- /dev/null +++ b/docs/development_setup.md @@ -0,0 +1,44 @@ +## Development environment setup + +### Setup + +Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace. + +In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. + +### Installation + +[Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively. + +Create a virtual environment using [venv](https://docs.python.org/3/library/venv.html), [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). + +Here is an example for setting up and activating a `venv` environment with Python 3: + +``` +python3 -mvenv .venv +source .venv/bin/activate +``` + +Install the required Python modules in your virtual environment. + +``` +pip install -r environment_setup/requirements.txt +``` + +### Running local code + +To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line): + +``` +export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py +``` + +BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the +`build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is +set to the current build number. In a local environment, we can use a command such as +`uuidgen` so set a different random identifier on each run, ensuring there are +no collisions. + +### Local testing + +Before committing, run `tox` to execute linter and unit test checks. diff --git a/docs/getting_started.md b/docs/getting_started.md index 3d372ab3..bbd20506 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -80,10 +80,7 @@ There are more variables used in the project. They're defined in two places, one ### Local configuration -In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. - -For local development, you will also need to [install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively. -Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace. +For instructions on how to set up a local development environment, refer to the [Development environment setup instructions](development_setup.md). ### Azure DevOps configuration diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 1bdd081d..930514a6 100644 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -26,6 +26,4 @@ python --version -pip install azure-cli==2.0.46 -pip install --upgrade azureml-sdk[cli] pip install -r requirements.txt \ No newline at end of file diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt index f99e7f4b..babb1ddc 100644 --- a/environment_setup/requirements.txt +++ b/environment_setup/requirements.txt @@ -1,4 +1,5 @@ pytest>=5.3 +pytest-cov>=2.8.1 requests>=2.22 numpy>=1.17 pandas>=0.25 @@ -6,5 +7,6 @@ scikit-learn>=0.21.3 azureml-sdk>=1.0 python-dotenv>=0.10.3 flake8>=3.7 -flake8_formatter_junit_xml +flake8_formatter_junit_xml>=0.0.6 +tox>=3.14.3 azure-cli==2.0.76 diff --git a/lint-and-test.sh b/lint-and-test.sh new file mode 100755 index 00000000..77c646ba --- /dev/null +++ b/lint-and-test.sh @@ -0,0 +1,4 @@ +#!/bin/sh +set -eux +flake8 --output-file=lint-testresults.xml --format junit-xml +python -m pytest tests/unit --cov=diabetes_regression --cov-report=html --cov-report=xml --junitxml=unit-testresults.xml diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py index 58d075aa..0fa34b1e 100644 --- a/ml_service/util/smoke_test_scoring_service.py +++ b/ml_service/util/smoke_test_scoring_service.py @@ -49,7 +49,7 @@ def call_web_app(url, headers): response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: - if i == retries-1: + if i == retries - 1: raise e print(e) print("Retrying...") diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index 6b6d60ac..e8457f95 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -5,9 +5,9 @@ def test_train_model(): - X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1) + X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1) y_train = np.array([10, 9, 8, 8, 6, 5]) - X_test = np.array([3, 4]).reshape(-1, 1) + X_test = np.array([3, 4]).reshape(-1, 1) y_test = np.array([8, 7]) data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} diff --git a/tests/unit/data_test.py b/tests/unit/data_test.py index 8b40b8bc..4148f029 100644 --- a/tests/unit/data_test.py +++ b/tests/unit/data_test.py @@ -120,8 +120,8 @@ def test_check_distribution(): mean = np.mean(dataset.values, axis=0) std = np.mean(dataset.values, axis=0) assert ( - np.sum(abs(mean - historical_mean) > - shift_tolerance * abs(historical_mean)) - or np.sum(abs(std - historical_std) > - shift_tolerance * abs(historical_std)) > 0 + np.sum(abs(mean - historical_mean) + > shift_tolerance * abs(historical_mean)) + or np.sum(abs(std - historical_std) + > shift_tolerance * abs(historical_std)) > 0 ) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..e24d8ae1 --- /dev/null +++ b/tox.ini @@ -0,0 +1,7 @@ +[flake8] +# ignore obsolete warning +ignore = W503 +exclude = .git,__pycache__,.venv,.tox,**/site-packages/**/*.py,**/lib/**.py,**/bin/**.py + +[pytest] +junit_family = legacy