From c8406234257e63fd5a28757f1e5ed3b1afc097b1 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Wed, 29 Jan 2020 22:55:03 +0100 Subject: [PATCH 01/22] . --- .../scoring/inference_config.yml | 4 +-- ...endencies.yml => scoring_dependencies.yml} | 23 ++++++++--------- .../training/training_dependencies.yml | 17 +++++++++++++ environment_setup/Dockerfile | 17 ++++++++----- environment_setup/ci_environment.yml | 25 +++++++++++++++++++ environment_setup/install_requirements.sh | 9 +++---- environment_setup/requirements.txt | 10 -------- ...iabetes_regression_build_train_pipeline.py | 12 +++------ 8 files changed, 74 insertions(+), 43 deletions(-) rename diabetes_regression/scoring/{conda_dependencies.yml => scoring_dependencies.yml} (74%) create mode 100644 diabetes_regression/training/training_dependencies.yml create mode 100644 environment_setup/ci_environment.yml delete mode 100644 environment_setup/requirements.txt diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index 3f65cf33..ca2c29ce 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -1,9 +1,9 @@ entryScript: score.py runtime: python -condaFile: conda_dependencies.yml +condaFile: scoring_dependencies.yml extraDockerfileSteps: schemaFile: sourceDirectory: enableGpu: False baseImage: -baseImageRegistry: \ No newline at end of file +baseImageRegistry: diff --git a/diabetes_regression/scoring/conda_dependencies.yml b/diabetes_regression/scoring/scoring_dependencies.yml similarity index 74% rename from diabetes_regression/scoring/conda_dependencies.yml rename to diabetes_regression/scoring/scoring_dependencies.yml index c97a2722..6f78c5a6 100644 --- a/diabetes_regression/scoring/conda_dependencies.yml +++ b/diabetes_regression/scoring/scoring_dependencies.yml @@ -14,24 +14,23 @@ # This directive is stored in a comment to preserve the Conda file structure. # [AzureMlVersion] = 2 -name: project_environment +name: diabetes_scoring + dependencies: + # The python interpreter version. - # Currently Azure ML Workbench only supports 3.5.2 and later. - python=3.7.5 + # Required by azureml-defaults, installed separately through Conda to # get a prebuilt version and not require build tools for the install. - psutil=5.6 #latest +- numpy +- pandas +- scikit-learn + +- pip - pip: - # Required packages for AzureML execution, history, and data preparation. - - azureml-model-management-sdk==1.0.1b6.post1 - - azureml-sdk==1.0.74 - - scipy==1.3.1 - - scikit-learn==0.22 - - pandas==0.25.3 - - numpy==1.17.3 - - joblib==0.14.0 - - gunicorn==19.9.0 - - flask==1.1.1 + # You must list azureml-defaults as a pip dependency + - azureml-defaults>=1.0.45 - inference-schema[numpy-support] diff --git a/diabetes_regression/training/training_dependencies.yml b/diabetes_regression/training/training_dependencies.yml new file mode 100644 index 00000000..03dcb89a --- /dev/null +++ b/diabetes_regression/training/training_dependencies.yml @@ -0,0 +1,17 @@ +name: diabetes_training + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- numpy +- pandas +- scikit-learn +- tensorflow +- keras + +- pip +- pip: + - azureml-core + - azure-storage-blob diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 5e7b7581..6585e437 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -4,11 +4,16 @@ LABEL org.label-schema.vendor = "Microsoft" \ org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" - +COPY diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml /setup/ -COPY environment_setup/requirements.txt /setup/ - -RUN apt-get update && apt-get install gcc -y && pip install --upgrade -r /setup/requirements.txt && \ - conda install -c r r-essentials +RUN pip install conda-merge==0.1.5 && \ + cd /setup && conda-merge training_dependencies.yml scoring_dependencies.yml ci_environment.yml > conda_merged.yml && \ + echo "Generated conda environment definition:" && cat conda_merged.yml && \ + conda env create -f conda_merged.yml -CMD ["python"] \ No newline at end of file +# activate environment +ENV PATH /usr/local/envs/ci/bin:$PATH +RUN /bin/bash -c "source activate ci" + +# Install Azure CLI ML extension +RUN az extension add -n azure-cli-ml diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml new file mode 100644 index 00000000..a7aae95f --- /dev/null +++ b/environment_setup/ci_environment.yml @@ -0,0 +1,25 @@ +name: ci + +dependencies: + +- r +- r-essentials +- numpy +- pandas +- scikit-learn + +- pip +- pip: + + # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml. + # Versions specification must match exactly, or the docker build will fail. + - azureml-sdk==1.0.74 + + # Additional pip dependencies for the CI environment. + - pytest>=5.3 + - requests>=2.22 + - numpy>=1.17 + - python-dotenv>=0.10.3 + - flake8>=3.7 + - flake8_formatter_junit_xml + - azure-cli>=2.0.76 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 1bdd081d..752d2959 100644 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -24,8 +24,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. - -python --version -pip install azure-cli==2.0.46 -pip install --upgrade azureml-sdk[cli] -pip install -r requirements.txt \ No newline at end of file +set -eux +pip install conda-merge==0.1.5 +conda-merge environment_setup/ci_environment.yml diabetes_regression/scoring/scoring_dependencies.yml diabetes_regression/training/training_dependencies.yml > /tmp/conda_merged.yml +conda env create -f /tmp/conda_merged.yml diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt deleted file mode 100644 index f99e7f4b..00000000 --- a/environment_setup/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -pytest>=5.3 -requests>=2.22 -numpy>=1.17 -pandas>=0.25 -scikit-learn>=0.21.3 -azureml-sdk>=1.0 -python-dotenv>=0.10.3 -flake8>=3.7 -flake8_formatter_junit_xml -azure-cli==2.0.76 diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 0e963d96..c2440a7b 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -31,14 +31,10 @@ def main(): print("aml_compute:") print(aml_compute) - run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( - conda_packages=['numpy', 'pandas', - 'scikit-learn', 'tensorflow', 'keras'], - pip_packages=['azure', 'azureml-core', - 'azure-storage', - 'azure-storage-blob', - 'azureml-dataprep']) - ) + # Create a reusable run configuration environment + conda_deps_file = "diabetes_regression/training/training_dependencies.yml" + conda_deps = CondaDependencies(conda_deps_file) + run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True config_envvar = {} if (e.collection_uri is not None and e.teamproject_name is not None): From c2b953ff16dbb3491e94172d4c71ee3a3327f7c4 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 04:49:19 +0100 Subject: [PATCH 02/22] . --- environment_setup/Dockerfile | 4 ++-- environment_setup/ci_environment.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 6585e437..dfe1bdce 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -12,8 +12,8 @@ RUN pip install conda-merge==0.1.5 && \ conda env create -f conda_merged.yml # activate environment -ENV PATH /usr/local/envs/ci/bin:$PATH -RUN /bin/bash -c "source activate ci" +ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH +RUN /bin/bash -c "source activate mlopspython_ci" # Install Azure CLI ML extension RUN az extension add -n azure-cli-ml diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml index a7aae95f..c8443aec 100644 --- a/environment_setup/ci_environment.yml +++ b/environment_setup/ci_environment.yml @@ -1,4 +1,4 @@ -name: ci +name: mlopspython_ci dependencies: From bceeba6de8151352f953214af38684c24b8a9d49 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 07:04:25 +0100 Subject: [PATCH 03/22] Update code_test.py --- tests/unit/code_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index c7b10182..0e09a2eb 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -19,7 +19,7 @@ def test_train_model(): run = Mock(Run) reg = train_model(run, data, alpha=1.2) - run.log.assert_called_with("mse", 0.029843893480256872, + run.log.assert_called_with("mse", 0.029843893480257067, description='Mean squared error metric') preds = reg.predict([[1], [2]]) From 897b7d40bc34a551d32213180dcfb728147817ee Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 11:02:09 +0100 Subject: [PATCH 04/22] . --- docs/code_description.md | 7 ++++--- environment_setup/ci_environment.yml | 5 +++-- environment_setup/install_requirements.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) mode change 100644 => 100755 environment_setup/install_requirements.sh diff --git a/docs/code_description.md b/docs/code_description.md index 5a1af307..d8db0c0d 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -2,9 +2,9 @@ ### Environment Setup -- `environment_setup/requirements.txt` : It consists of a list of python packages which are needed by the train.py to run successfully on host agent (locally). +- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment. -- `environment_setup/install_requirements.sh` : This script prepares the python environment i.e. install the Azure ML SDK and the packages specified in requirements.txt +- `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates. @@ -32,6 +32,7 @@ - `diabetes_regression/training/train.py` : a training step of an ML training pipeline. - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. +- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute @@ -39,5 +40,5 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. -- `diabetes_regression/scoring/conda_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image +- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml index c8443aec..d787ad94 100644 --- a/environment_setup/ci_environment.yml +++ b/environment_setup/ci_environment.yml @@ -11,8 +11,9 @@ dependencies: - pip - pip: - # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml. - # Versions specification must match exactly, or the docker build will fail. + # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml + # and/or diabetes_regression/training/training_dependencies.yml. + # If versions are specified, they must match exactly, or the docker build will fail. - azureml-sdk==1.0.74 # Additional pip dependencies for the CI environment. diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh old mode 100644 new mode 100755 index 752d2959..5efc5261 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -26,5 +26,5 @@ set -eux pip install conda-merge==0.1.5 -conda-merge environment_setup/ci_environment.yml diabetes_regression/scoring/scoring_dependencies.yml diabetes_regression/training/training_dependencies.yml > /tmp/conda_merged.yml +conda-merge diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml > /tmp/conda_merged.yml conda env create -f /tmp/conda_merged.yml From e304fd2a2533e09b9fdc55049b8da020e91a9d67 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 11:08:57 +0100 Subject: [PATCH 05/22] Update Dockerfile --- environment_setup/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index dfe1bdce..41994ca4 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -15,5 +15,8 @@ RUN pip install conda-merge==0.1.5 && \ ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH RUN /bin/bash -c "source activate mlopspython_ci" -# Install Azure CLI ML extension +# Install Azure CLI ML extension. +# This also serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail +# if pip installation fails, for example due to different version specs in the various environment files). +# The `az` command is not available if pip has not installed azure-cli. RUN az extension add -n azure-cli-ml From 41c2499270e88af9eecfe91a245e5a34aa906f75 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 07:00:42 +0100 Subject: [PATCH 06/22] Do not use conda-merge * Move all 3 conda files to a single dir * Do not use conda-merge * Pin package versions --- diabetes_regression/ci_dependencies.yml | 28 +++++++++++++++++++ .../scoring/inference_config.yml | 2 +- .../{scoring => }/scoring_dependencies.yml | 12 ++++---- .../training/training_dependencies.yml | 17 ----------- diabetes_regression/training_dependencies.yml | 17 +++++++++++ environment_setup/Dockerfile | 7 ++--- environment_setup/ci_environment.yml | 26 ----------------- environment_setup/install_requirements.sh | 4 +-- ...iabetes_regression_build_train_pipeline.py | 2 +- 9 files changed, 56 insertions(+), 59 deletions(-) create mode 100644 diabetes_regression/ci_dependencies.yml rename diabetes_regression/{scoring => }/scoring_dependencies.yml (89%) delete mode 100644 diabetes_regression/training/training_dependencies.yml create mode 100644 diabetes_regression/training_dependencies.yml delete mode 100644 environment_setup/ci_environment.yml diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml new file mode 100644 index 00000000..a61731c4 --- /dev/null +++ b/diabetes_regression/ci_dependencies.yml @@ -0,0 +1,28 @@ +name: mlopspython_ci + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- r=3.6.0 +- r-essentials=3.6.0 +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 + +- pip=20.0.2 +- pip: + + # dependencies shared with other environment .yml files. + - azureml-sdk==1.0.79 + + # Additional pip dependencies for the CI environment. + - pytest==5.3.1 + - pytest-cov==2.8.1 + - requests==2.22.0 + - python-dotenv==0.10.3 + - flake8==3.7.9 + - flake8_formatter_junit_xml==0.0.6 + - azure-cli==2.0.77 + - tox==3.14.3 diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index ca2c29ce..42947da8 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -1,6 +1,6 @@ entryScript: score.py runtime: python -condaFile: scoring_dependencies.yml +condaFile: ../scoring_dependencies.yml extraDockerfileSteps: schemaFile: sourceDirectory: diff --git a/diabetes_regression/scoring/scoring_dependencies.yml b/diabetes_regression/scoring_dependencies.yml similarity index 89% rename from diabetes_regression/scoring/scoring_dependencies.yml rename to diabetes_regression/scoring_dependencies.yml index 6f78c5a6..26ce3622 100644 --- a/diabetes_regression/scoring/scoring_dependencies.yml +++ b/diabetes_regression/scoring_dependencies.yml @@ -25,12 +25,12 @@ dependencies: # get a prebuilt version and not require build tools for the install. - psutil=5.6 #latest -- numpy -- pandas -- scikit-learn +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 -- pip +- pip=20.0.2 - pip: # You must list azureml-defaults as a pip dependency - - azureml-defaults>=1.0.45 - - inference-schema[numpy-support] + - azureml-defaults==1.0.85 + - inference-schema[numpy-support]==1.0.1 diff --git a/diabetes_regression/training/training_dependencies.yml b/diabetes_regression/training/training_dependencies.yml deleted file mode 100644 index 03dcb89a..00000000 --- a/diabetes_regression/training/training_dependencies.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: diabetes_training - -dependencies: - - # The python interpreter version. -- python=3.7.5 - -- numpy -- pandas -- scikit-learn -- tensorflow -- keras - -- pip -- pip: - - azureml-core - - azure-storage-blob diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml new file mode 100644 index 00000000..9a8bd6cf --- /dev/null +++ b/diabetes_regression/training_dependencies.yml @@ -0,0 +1,17 @@ +name: diabetes_training + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 +#- tensorflow +#- keras + +- pip=20.0.2 +- pip: + - azureml-core==1.0.79 + - azure-storage-blob==12.1.0 diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 41994ca4..5c7f62dc 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -4,12 +4,9 @@ LABEL org.label-schema.vendor = "Microsoft" \ org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" -COPY diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml /setup/ +COPY diabetes_regression/ci_dependencies.yml /setup/ -RUN pip install conda-merge==0.1.5 && \ - cd /setup && conda-merge training_dependencies.yml scoring_dependencies.yml ci_environment.yml > conda_merged.yml && \ - echo "Generated conda environment definition:" && cat conda_merged.yml && \ - conda env create -f conda_merged.yml +RUN conda env create -f /setup/ci_dependencies.yml # activate environment ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml deleted file mode 100644 index d787ad94..00000000 --- a/environment_setup/ci_environment.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: mlopspython_ci - -dependencies: - -- r -- r-essentials -- numpy -- pandas -- scikit-learn - -- pip -- pip: - - # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml - # and/or diabetes_regression/training/training_dependencies.yml. - # If versions are specified, they must match exactly, or the docker build will fail. - - azureml-sdk==1.0.74 - - # Additional pip dependencies for the CI environment. - - pytest>=5.3 - - requests>=2.22 - - numpy>=1.17 - - python-dotenv>=0.10.3 - - flake8>=3.7 - - flake8_formatter_junit_xml - - azure-cli>=2.0.76 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 5efc5261..9ed59899 100755 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -25,6 +25,4 @@ # POSSIBILITY OF SUCH DAMAGE. set -eux -pip install conda-merge==0.1.5 -conda-merge diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml > /tmp/conda_merged.yml -conda env create -f /tmp/conda_merged.yml +conda env create -f diabetes_regression/ci_dependencies.yml diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index c2440a7b..b4879c45 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -32,7 +32,7 @@ def main(): print(aml_compute) # Create a reusable run configuration environment - conda_deps_file = "diabetes_regression/training/training_dependencies.yml" + conda_deps_file = "diabetes_regression/training_dependencies.yml" conda_deps = CondaDependencies(conda_deps_file) run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True From 921b65f9e1c7a19f8aa7271db7a2a9b910ebb7cf Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 07:35:19 +0100 Subject: [PATCH 07/22] PR review fixes --- .gitignore | 1 + diabetes_regression/training_dependencies.yml | 3 ++- environment_setup/install_requirements.sh | 3 +++ .../diabetes_regression_build_train_pipeline.py | 2 +- ...iabetes_regression_build_train_pipeline_with_r.py | 12 ++++-------- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 7bac8768..02d3c963 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,7 @@ ENV/ env.bak/ venv.bak/ *.vscode +condaenv.* # Spyder project settings .spyderproject diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml index 9a8bd6cf..48f398fb 100644 --- a/diabetes_regression/training_dependencies.yml +++ b/diabetes_regression/training_dependencies.yml @@ -8,10 +8,11 @@ dependencies: - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 +#- r +#- r-essentials #- tensorflow #- keras - pip=20.0.2 - pip: - azureml-core==1.0.79 - - azure-storage-blob==12.1.0 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 9ed59899..989e8b1e 100755 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -25,4 +25,7 @@ # POSSIBILITY OF SUCH DAMAGE. set -eux + conda env create -f diabetes_regression/ci_dependencies.yml + +conda activate mlopspython_ci diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index aeb9a2b9..b127d9ee 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -28,7 +28,7 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a reusable run configuration environment + # Create a run configuration environment conda_deps_file = "diabetes_regression/training_dependencies.yml" conda_deps = CondaDependencies(conda_deps_file) run_config = RunConfiguration(conda_dependencies=conda_deps) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index cb47cdf5..4f71625f 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -26,15 +26,11 @@ def main(): print("aml_compute:") print(aml_compute) - run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( - conda_packages=['numpy', 'pandas', - 'scikit-learn', 'tensorflow', 'keras'], - pip_packages=['azure', 'azureml-core', - 'azure-storage', - 'azure-storage-blob']) - ) + # Create a run configuration environment + conda_deps_file = "diabetes_regression/training_dependencies.yml" + conda_deps = CondaDependencies(conda_deps_file) + run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True - run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python" train_step = PythonScriptStep( name="Train Model", From b05c3b7d625e69eed2f5090e97c698914dac1f43 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 08:08:00 +0100 Subject: [PATCH 08/22] Update Dockerfile --- environment_setup/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 5c7f62dc..4137967a 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -12,8 +12,8 @@ RUN conda env create -f /setup/ci_dependencies.yml ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH RUN /bin/bash -c "source activate mlopspython_ci" -# Install Azure CLI ML extension. -# This also serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail -# if pip installation fails, for example due to different version specs in the various environment files). -# The `az` command is not available if pip has not installed azure-cli. -RUN az extension add -n azure-cli-ml +# Verify conda installation. +# This serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail +# if pip installation fails, for example due to a wrong package version). +# The `az` command is not available if pip has not run (and installed azure-cli). +RUN az --version From e312ae64aab305ae44165dd4b81b7cf7536ad348 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 18:37:17 +0100 Subject: [PATCH 09/22] . --- .../diabetes_regression-ci-build-train.yml | 31 +----- .../scoring/deployment_config_aci.yml | 5 - .../scoring/deployment_config_aks.yml | 16 --- diabetes_regression/scoring_dependencies.yml | 13 --- ml_service/pipelines/deploy_web_service.py | 100 ++++++++++++++++++ ...iabetes_regression_build_train_pipeline.py | 14 +-- ml_service/util/manage_environment.py | 52 +++++++++ 7 files changed, 164 insertions(+), 67 deletions(-) delete mode 100644 diabetes_regression/scoring/deployment_config_aci.yml delete mode 100644 diabetes_regression/scoring/deployment_config_aks.yml create mode 100644 ml_service/pipelines/deploy_web_service.py create mode 100644 ml_service/util/manage_environment.py diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index bcc1249c..f55f49bc 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -111,26 +111,15 @@ stages: timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml - - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 - displayName: 'Azure ML Model Deploy' - inputs: - azureSubscription: $(WORKSPACE_SVC_CONNECTION) - modelSourceType: manualSpec - modelName: '$(MODEL_NAME)' - modelVersion: $(MODEL_VERSION) - inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml' - deploymentTarget: ACI - deploymentName: $(ACI_DEPLOYMENT_NAME) - deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aci.yml' - overwriteExistingDeployment: true - task: AzureCLI@1 - displayName: 'Smoke test' + displayName: Azure ML Model Deploy and smoke test inputs: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' scriptLocation: inlineScript inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.pipelines.deploy_web_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" python -m ml_service.util.smoke_test_scoring_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" - stage: 'Deploy_AKS' @@ -146,27 +135,15 @@ stages: timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml - - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 - displayName: 'Azure ML Model Deploy' - inputs: - azureSubscription: $(WORKSPACE_SVC_CONNECTION) - modelSourceType: manualSpec - modelName: '$(MODEL_NAME)' - modelVersion: $(MODEL_VERSION) - inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml' - deploymentTarget: AKS - aksCluster: $(AKS_COMPUTE_NAME) - deploymentName: $(AKS_DEPLOYMENT_NAME) - deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aks.yml' - overwriteExistingDeployment: true - task: AzureCLI@1 - displayName: 'Smoke test' + displayName: Azure ML Model Deploy and smoke test inputs: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' scriptLocation: inlineScript inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.pipelines.deploy_web_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" --compute_target "$(AKS_COMPUTE_NAME)" python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" - stage: 'Deploy_Webapp' diff --git a/diabetes_regression/scoring/deployment_config_aci.yml b/diabetes_regression/scoring/deployment_config_aci.yml deleted file mode 100644 index 939483b5..00000000 --- a/diabetes_regression/scoring/deployment_config_aci.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -containerResourceRequirements: - cpu: 1 - memoryInGB: 4 -computeType: ACI \ No newline at end of file diff --git a/diabetes_regression/scoring/deployment_config_aks.yml b/diabetes_regression/scoring/deployment_config_aks.yml deleted file mode 100644 index 1299dc9d..00000000 --- a/diabetes_regression/scoring/deployment_config_aks.yml +++ /dev/null @@ -1,16 +0,0 @@ -computeType: AKS -autoScaler: - autoscaleEnabled: True - minReplicas: 1 - maxReplicas: 3 - refreshPeriodInSeconds: 10 - targetUtilization: 70 -authEnabled: True -containerResourceRequirements: - cpu: 1 - memoryInGB: 4 -appInsightsEnabled: True -scoringTimeoutMs: 5000 -maxConcurrentRequestsPerContainer: 2 -maxQueueWaitMs: 5000 -sslEnabled: True diff --git a/diabetes_regression/scoring_dependencies.yml b/diabetes_regression/scoring_dependencies.yml index 26ce3622..bf3ca2ce 100644 --- a/diabetes_regression/scoring_dependencies.yml +++ b/diabetes_regression/scoring_dependencies.yml @@ -1,19 +1,6 @@ -# Conda environment specification. The dependencies defined in this file will -# be automatically provisioned for managed runs. These include runs against -# the localdocker, remotedocker, and cluster compute targets. - -# Note that this file is NOT used to automatically manage dependencies for the -# local compute target. To provision these dependencies locally, run: -# conda env update --file conda_dependencies.yml - # Details about the Conda environment file format: # https://conda.io/docs/using/envs.html#create-environment-file-by-hand -# For managing Spark packages and configuration, see spark_dependencies.yml. -# Version of this configuration file's structure and semantics in AzureML. -# This directive is stored in a comment to preserve the Conda file structure. -# [AzureMlVersion] = 2 - name: diabetes_scoring dependencies: diff --git a/ml_service/pipelines/deploy_web_service.py b/ml_service/pipelines/deploy_web_service.py new file mode 100644 index 00000000..dff38cf6 --- /dev/null +++ b/ml_service/pipelines/deploy_web_service.py @@ -0,0 +1,100 @@ +import argparse +import os +from azureml.core import Workspace +from azureml.core.webservice import AciWebservice, AksWebservice +from azureml.core.model import InferenceConfig, Model +from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment + + +def main(): + parser = argparse.ArgumentParser("smoke_test_scoring_service.py") + + parser.add_argument( + "--type", + type=str, + choices=["AKS", "ACI"], + required=True, + help="type of service" + ) + parser.add_argument( + "--service", + type=str, + required=True, + help="Name of the service to deploy" + ) + parser.add_argument( + "--compute_target", + type=str, + required=True, + help="Name of the compute target. Only applicable if type = AKS" + ) + args = parser.parse_args() + + e = Env() + # Get Azure machine learning workspace + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group + ) + print("get_workspace:") + print(aml_workspace) + + # Create a reusable scoring environment + environment = get_environment( + aml_workspace, "diabetes_scoring", + "diabetes_regression/scoring_dependencies.yml") + + inference_config = InferenceConfig( + entry_script='score.py', + source_directory=os.path.join(e.sources_directory_train, "scoring"), + environment=environment, + ) + + service_description=f'Scoring model version {e.model_version}' + + if args.type == "AKS": + + deployment_config = AksWebservice.deploy_configuration( + compute_target_name=args.compute_target, + description=service_description, + autoscale_enabled=True, + autoscale_min_replicas=1, + autoscale_max_replicas=3, + autoscale_refresh_seconds=10, + autoscale_target_utilization=70, + auth_enabled=True, + cpu_cores=1, + memory_gb=4, + scoring_timeout_ms=5000, + replica_max_concurrent_requests=2, + max_request_wait_time=5000, + ) + + else: + + deployment_config = AciWebservice.deploy_configuration( + description=service_description, + cpu_cores=1, + memory_gb=4, + ) + + deployment_config.add_tags({"BuildId": e.build_id}) + + model = Model(aml_workspace, name=e.model_name, version=e.model_version) + + print(f'Deploying model {model} as service {args.service}') + service = Model.deploy( + workspace=aml_workspace, + name=args.service, + models=[model], + inference_config=inference_config, + deployment_config=deployment_config, + overwrite=True, + ) + service.wait_for_deployment(show_output=True) + + +if __name__ == '__main__': + main() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index b127d9ee..5712aeae 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -2,10 +2,11 @@ from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline from azureml.core import Workspace -from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.core.runconfig import RunConfiguration from azureml.core import Dataset, Datastore from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment def main(): @@ -28,11 +29,12 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a run configuration environment - conda_deps_file = "diabetes_regression/training_dependencies.yml" - conda_deps = CondaDependencies(conda_deps_file) - run_config = RunConfiguration(conda_dependencies=conda_deps) - run_config.environment.docker.enabled = True + # Create a reusable run configuration environment + run_config = RunConfiguration() + run_config.environment = get_environment( + aml_workspace, "diabetes_regression", + "diabetes_regression/training_dependencies.yml") + config_envvar = {} if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py new file mode 100644 index 00000000..a7b63b0d --- /dev/null +++ b/ml_service/util/manage_environment.py @@ -0,0 +1,52 @@ +from azureml.core import Workspace, Environment +import hashlib + + +def get_environment( + workspace: Workspace, + base_name: str, + environment_file: str, +): + """ + Get or create an Azure ML environment definition from a Conda YAML file. + For DevOps scenarios, it's important to have reproducible outcomes, + so that environments should be automatically updated whenever the Conda + YAML file is modified. It's also important that the pipeline can run + in multiple branches in parallel without interference. To enable that, + this function automatically creates an environment name from a base + name with a checksum appended. If that environment already exists, + it is retrieved, otherwise it is created from the Conda file. + """ + + with open(environment_file, 'rb') as file: + checksum = hashlib.sha1(file.read()).hexdigest() + + environment_name = base_name + "_" + checksum + try: + env = Environment.get( + workspace=workspace, name=environment_name) + print(f'Reusing environment {env}') + except Exception: + print(f'Creating environment {environment_name}') + env = create_environment( + workspace, environment_name, environment_file) + return env + + +def create_environment( + workspace: Workspace, + environment_name: str, + environment_file: str, +): + print("Creating a new environment " + environment_name) + + try: + aml_env = Environment.from_conda_specification( + name=environment_name, + file_path=environment_file) + aml_env.register(workspace) + return aml_env + except Exception as e: + print(e) + print('An error occurred while creating an environment.') + raise From 4b9323f90d630741e7918500ea616b44ad039bd9 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 19:05:07 +0100 Subject: [PATCH 10/22] Update deploy_web_service.py --- ml_service/pipelines/deploy_web_service.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ml_service/pipelines/deploy_web_service.py b/ml_service/pipelines/deploy_web_service.py index dff38cf6..99e41508 100644 --- a/ml_service/pipelines/deploy_web_service.py +++ b/ml_service/pipelines/deploy_web_service.py @@ -8,7 +8,7 @@ def main(): - parser = argparse.ArgumentParser("smoke_test_scoring_service.py") + parser = argparse.ArgumentParser("deploy_web_service.py") parser.add_argument( "--type", @@ -26,11 +26,13 @@ def main(): parser.add_argument( "--compute_target", type=str, - required=True, help="Name of the compute target. Only applicable if type = AKS" ) args = parser.parse_args() + if args.type == "AKS" and args.compute_target is None: + raise ValueError("--compute_target is required") + e = Env() # Get Azure machine learning workspace aml_workspace = Workspace.get( @@ -57,8 +59,9 @@ def main(): if args.type == "AKS": deployment_config = AksWebservice.deploy_configuration( - compute_target_name=args.compute_target, description=service_description, + tags = {"BuildId": e.build_id}, + compute_target_name=args.compute_target, autoscale_enabled=True, autoscale_min_replicas=1, autoscale_max_replicas=3, @@ -76,11 +79,11 @@ def main(): deployment_config = AciWebservice.deploy_configuration( description=service_description, + tags = {"BuildId": e.build_id}, cpu_cores=1, memory_gb=4, ) - deployment_config.add_tags({"BuildId": e.build_id}) model = Model(aml_workspace, name=e.model_name, version=e.model_version) From a12bcb20cb64019f2e6b80adb1a0e087615443eb Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 19:06:51 +0100 Subject: [PATCH 11/22] Update deploy_web_service.py --- ml_service/pipelines/deploy_web_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ml_service/pipelines/deploy_web_service.py b/ml_service/pipelines/deploy_web_service.py index 99e41508..e7b912b4 100644 --- a/ml_service/pipelines/deploy_web_service.py +++ b/ml_service/pipelines/deploy_web_service.py @@ -54,13 +54,13 @@ def main(): environment=environment, ) - service_description=f'Scoring model version {e.model_version}' + service_description = f'Scoring model version {e.model_version}' if args.type == "AKS": deployment_config = AksWebservice.deploy_configuration( description=service_description, - tags = {"BuildId": e.build_id}, + tags={"BuildId": e.build_id}, compute_target_name=args.compute_target, autoscale_enabled=True, autoscale_min_replicas=1, @@ -79,12 +79,11 @@ def main(): deployment_config = AciWebservice.deploy_configuration( description=service_description, - tags = {"BuildId": e.build_id}, + tags={"BuildId": e.build_id}, cpu_cores=1, memory_gb=4, ) - model = Model(aml_workspace, name=e.model_name, version=e.model_version) print(f'Deploying model {model} as service {args.service}') From 5f81ea1292a468a1a3e8bd46fd424abf79607a56 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 20:46:20 +0100 Subject: [PATCH 12/22] PR review fixes --- diabetes_regression/scoring/inference_config.yml | 9 --------- docs/code_description.md | 12 +++++++----- ...iabetes_regression_build_train_pipeline_with_r.py | 11 ++++++----- 3 files changed, 13 insertions(+), 19 deletions(-) delete mode 100644 diabetes_regression/scoring/inference_config.yml diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml deleted file mode 100644 index 42947da8..00000000 --- a/diabetes_regression/scoring/inference_config.yml +++ /dev/null @@ -1,9 +0,0 @@ -entryScript: score.py -runtime: python -condaFile: ../scoring_dependencies.yml -extraDockerfileSteps: -schemaFile: -sourceDirectory: -enableGpu: False -baseImage: -baseImageRegistry: diff --git a/docs/code_description.md b/docs/code_description.md index 37bb005f..2522b39c 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -2,8 +2,6 @@ ### Environment Setup -- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment. - - `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates. @@ -25,14 +23,20 @@ - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute. - `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API. - `ml_service/pipelines/diabetes_regression_verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model. +- `ml_service/pipelines/deploy_web_service.py` : deploys the model to ACI or AKS. Also contains the deployment configuration for the environments (e.g. CPU, memory, number of replicas in AKS). - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. +### Environment Definitions + +- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run). +- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run). +- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. + ### Code - `diabetes_regression/training/train.py` : a training step of an ML training pipeline. - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. -- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute @@ -40,5 +44,3 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. -- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image -- `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 1c383d50..0009a6da 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -4,6 +4,7 @@ from azureml.core.runconfig import RunConfiguration, CondaDependencies from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment def main(): @@ -26,11 +27,11 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a run configuration environment - conda_deps_file = "diabetes_regression/training_dependencies.yml" - conda_deps = CondaDependencies(conda_deps_file) - run_config = RunConfiguration(conda_dependencies=conda_deps) - run_config.environment.docker.enabled = True + # Create a reusable run configuration environment + run_config = RunConfiguration() + run_config.environment = get_environment( + aml_workspace, "diabetes_regression", + "diabetes_regression/training_dependencies.yml") train_step = PythonScriptStep( name="Train Model", From 3b6974b1a33e0abec8f03ec7d089fb4c4dfa14b0 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 21:07:10 +0100 Subject: [PATCH 13/22] PR review fixes --- docs/getting_started.md | 8 +++++++- ml_service/util/manage_environment.py | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index bbd20506..b8b0e42c 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -171,7 +171,13 @@ Great, you now have the build pipeline set up which automatically triggers every **Note:** The build pipeline also supports building and publishing ML pipelines using R to train a model. This is enabled -by changing the `build-train-script` pipeline variable to either `diabetes_regression_build_train_pipeline_with_r.py`, or `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need +by changing the `build-train-script` pipeline variable to either of: +* `diabetes_regression_build_train_pipeline_with_r.py` to train a model +with R on Azure ML Compute. You will also need to add the +`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml` +and `diabetes_regression/training_dependencies.yml`. +* `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` +to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be specified). diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py index a7b63b0d..c04319d1 100644 --- a/ml_service/util/manage_environment.py +++ b/ml_service/util/manage_environment.py @@ -38,7 +38,6 @@ def create_environment( environment_name: str, environment_file: str, ): - print("Creating a new environment " + environment_name) try: aml_env = Environment.from_conda_specification( From de72bdec77674156a1a12a5ccf54ff38c5e24781 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 21:11:04 +0100 Subject: [PATCH 14/22] PR review fixes --- docs/code_description.md | 10 ++++++---- docs/getting_started.md | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/code_description.md b/docs/code_description.md index 37bb005f..d69a6f30 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -2,8 +2,6 @@ ### Environment Setup -- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment. - - `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates. @@ -27,12 +25,17 @@ - `ml_service/pipelines/diabetes_regression_verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model. - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. +### Environment Definitions + +- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run). +- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run). +- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. + ### Code - `diabetes_regression/training/train.py` : a training step of an ML training pipeline. - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. -- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute @@ -40,5 +43,4 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. -- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/docs/getting_started.md b/docs/getting_started.md index bbd20506..8b3167e4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -171,7 +171,13 @@ Great, you now have the build pipeline set up which automatically triggers every **Note:** The build pipeline also supports building and publishing ML pipelines using R to train a model. This is enabled -by changing the `build-train-script` pipeline variable to either `diabetes_regression_build_train_pipeline_with_r.py`, or `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need +by changing the `build-train-script` pipeline variable to either of: +* `diabetes_regression_build_train_pipeline_with_r.py` to train a model +with R on Azure ML Compute. You will also need to add the +`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml` +and `diabetes_regression/training_dependencies.yml`. +* `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` +to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be specified). @@ -243,6 +249,7 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. * The sample pipeline generates a random value for a model hyperparameter (ridge regression [*alpha*](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)) to generate 'interesting' charts when testing the sample. In a real application you should use fixed hyperparameter values. You can [tune hyperparameter values using Azure ML](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters), and manage their values in Azure DevOps Variable Groups. * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. +* You can install additional Conda or pip packages by modifying the YAML environment configurations under the `diabetes_regression` directory. Make sure to use fixed version numbers for all packages to ensure reproducibility, and use the same versions across environments. * You can explore aspects of model observability in the solution, such as: * **Logging**: navigate to the Application Insights instance linked to the Azure ML Portal, then to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs From 568bdeec21229b18923ba6f365b415b136febff0 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 21:12:52 +0100 Subject: [PATCH 15/22] Update training_dependencies.yml --- diabetes_regression/training_dependencies.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml index 48f398fb..4d7a42a7 100644 --- a/diabetes_regression/training_dependencies.yml +++ b/diabetes_regression/training_dependencies.yml @@ -8,7 +8,6 @@ dependencies: - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 -#- r #- r-essentials #- tensorflow #- keras From 1ddc19ea863eb287e1cd7aca2c0d1ec0076e0159 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 23:28:53 +0100 Subject: [PATCH 16/22] Linting fixes --- ml_service/pipelines/deploy_web_service.py | 4 ++-- .../diabetes_regression_build_train_pipeline_with_r.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ml_service/pipelines/deploy_web_service.py b/ml_service/pipelines/deploy_web_service.py index e7b912b4..bb1fd1f2 100644 --- a/ml_service/pipelines/deploy_web_service.py +++ b/ml_service/pipelines/deploy_web_service.py @@ -45,8 +45,8 @@ def main(): # Create a reusable scoring environment environment = get_environment( - aml_workspace, "diabetes_scoring", - "diabetes_regression/scoring_dependencies.yml") + aml_workspace, "diabetes_scoring", + "diabetes_regression/scoring_dependencies.yml") inference_config = InferenceConfig( entry_script='score.py', diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 0009a6da..cd93f5f6 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -1,7 +1,7 @@ from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline from azureml.core import Workspace -from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.core.runconfig import RunConfiguration from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env from ml_service.util.manage_environment import get_environment From 2fa64b87d05cae80e5e0498c67d1d53b01a9d95b Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 23:32:38 +0100 Subject: [PATCH 17/22] Fixed merge --- diabetes_regression/scoring_dependencies.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/diabetes_regression/scoring_dependencies.yml b/diabetes_regression/scoring_dependencies.yml index 26ce3622..bf3ca2ce 100644 --- a/diabetes_regression/scoring_dependencies.yml +++ b/diabetes_regression/scoring_dependencies.yml @@ -1,19 +1,6 @@ -# Conda environment specification. The dependencies defined in this file will -# be automatically provisioned for managed runs. These include runs against -# the localdocker, remotedocker, and cluster compute targets. - -# Note that this file is NOT used to automatically manage dependencies for the -# local compute target. To provision these dependencies locally, run: -# conda env update --file conda_dependencies.yml - # Details about the Conda environment file format: # https://conda.io/docs/using/envs.html#create-environment-file-by-hand -# For managing Spark packages and configuration, see spark_dependencies.yml. -# Version of this configuration file's structure and semantics in AzureML. -# This directive is stored in a comment to preserve the Conda file structure. -# [AzureMlVersion] = 2 - name: diabetes_scoring dependencies: From a0052dda74d6ecffa505343a882a503fa9aacbd8 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 23:35:56 +0100 Subject: [PATCH 18/22] Update code_test.py --- tests/unit/code_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index 1b133766..e8457f95 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -15,7 +15,7 @@ def test_train_model(): run = Mock(Run) reg = train_model(run, data, alpha=1.2) - run.log.assert_called_with("mse", 0.029843893480257067, + run.log.assert_called_with("mse", 0.029843893480256872, description='Mean squared error metric') preds = reg.predict([[1], [2]]) From a80e64a991e4bdaf62cd774ed40bfcc92ea06400 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Sun, 2 Feb 2020 10:24:19 +0100 Subject: [PATCH 19/22] Simplified environment management, restored deploy task --- .../diabetes_regression-ci-build-train.yml | 16 ++- diabetes_regression/azureml_environment.json | 39 +++++++ diabetes_regression/ci_dependencies.yml | 9 +- ...ependencies.yml => conda_dependencies.yml} | 14 ++- .../scoring/deployment_config_aci.yml | 4 + .../scoring/deployment_config_aks.yml | 16 +++ .../scoring/inference_config.yml | 9 ++ diabetes_regression/training_dependencies.yml | 17 --- docs/code_description.md | 6 +- ml_service/pipelines/deploy_web_service.py | 102 ------------------ ...iabetes_regression_build_train_pipeline.py | 17 ++- ..._regression_build_train_pipeline_with_r.py | 14 ++- ml_service/util/manage_environment.py | 51 --------- 13 files changed, 119 insertions(+), 195 deletions(-) create mode 100644 diabetes_regression/azureml_environment.json rename diabetes_regression/{scoring_dependencies.yml => conda_dependencies.yml} (71%) create mode 100644 diabetes_regression/scoring/deployment_config_aci.yml create mode 100644 diabetes_regression/scoring/deployment_config_aks.yml create mode 100644 diabetes_regression/scoring/inference_config.yml delete mode 100644 diabetes_regression/training_dependencies.yml delete mode 100644 ml_service/pipelines/deploy_web_service.py delete mode 100644 ml_service/util/manage_environment.py diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 3a27b4f8..099d8401 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -127,15 +127,27 @@ stages: timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml + - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 + displayName: 'Azure ML Model Deploy' + inputs: + azureSubscription: $(WORKSPACE_SVC_CONNECTION) + modelSourceType: manualSpec + modelName: '$(MODEL_NAME)' + modelVersion: $(MODEL_VERSION) + inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml' + deploymentTarget: AKS + aksCluster: $(AKS_COMPUTE_NAME) + deploymentName: $(AKS_DEPLOYMENT_NAME) + deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aks.yml' + overwriteExistingDeployment: true - task: AzureCLI@1 - displayName: Azure ML Model Deploy and smoke test + displayName: 'Smoke test' inputs: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' scriptLocation: inlineScript inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) - python -m ml_service.pipelines.deploy_web_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" --compute_target "$(AKS_COMPUTE_NAME)" python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" - stage: 'Deploy_Webapp' diff --git a/diabetes_regression/azureml_environment.json b/diabetes_regression/azureml_environment.json new file mode 100644 index 00000000..8a81614e --- /dev/null +++ b/diabetes_regression/azureml_environment.json @@ -0,0 +1,39 @@ +{ + "name": "diabetes_regression_sklearn", + "version": null, + "environmentVariables": { + "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE" + }, + "python": { + "userManagedDependencies": false, + "interpreterPath": "python", + "condaDependenciesFile": null, + "baseCondaEnvironment": null + }, + "docker": { + "enabled": true, + "baseImage": "mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04", + "baseDockerfile": null, + "sharedVolumes": true, + "shmSize": "2g", + "arguments": [], + "baseImageRegistry": { + "address": null, + "username": null, + "password": null + } + }, + "spark": { + "repositories": [], + "packages": [], + "precachePackages": true + }, + "databricks": { + "mavenLibraries": [], + "pypiLibraries": [], + "rcranLibraries": [], + "jarLibraries": [], + "eggLibraries": [] + }, + "inferencingStackVersion": null +} diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml index a61731c4..a5ee8db7 100644 --- a/diabetes_regression/ci_dependencies.yml +++ b/diabetes_regression/ci_dependencies.yml @@ -5,16 +5,19 @@ dependencies: # The python interpreter version. - python=3.7.5 -- r=3.6.0 -- r-essentials=3.6.0 + # dependencies with versions aligned with conda_dependencies.yml. - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 + # dependencies for MLOps with R. +- r=3.6.0 +- r-essentials=3.6.0 + - pip=20.0.2 - pip: - # dependencies shared with other environment .yml files. + # dependencies with versions aligned with conda_dependencies.yml. - azureml-sdk==1.0.79 # Additional pip dependencies for the CI environment. diff --git a/diabetes_regression/scoring_dependencies.yml b/diabetes_regression/conda_dependencies.yml similarity index 71% rename from diabetes_regression/scoring_dependencies.yml rename to diabetes_regression/conda_dependencies.yml index bf3ca2ce..741f55c7 100644 --- a/diabetes_regression/scoring_dependencies.yml +++ b/diabetes_regression/conda_dependencies.yml @@ -1,7 +1,7 @@ # Details about the Conda environment file format: # https://conda.io/docs/using/envs.html#create-environment-file-by-hand -name: diabetes_scoring +name: diabetes_regression_sklearn dependencies: @@ -15,9 +15,19 @@ dependencies: - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 +#- r-essentials +#- tensorflow +#- keras - pip=20.0.2 - pip: + + # Dependencies for training environment. + + - azureml-core==1.0.79 + + # Dependencies for scoring environment. + # You must list azureml-defaults as a pip dependency - - azureml-defaults==1.0.85 + - azureml-defaults==1.0.79 - inference-schema[numpy-support]==1.0.1 diff --git a/diabetes_regression/scoring/deployment_config_aci.yml b/diabetes_regression/scoring/deployment_config_aci.yml new file mode 100644 index 00000000..d2e0ba12 --- /dev/null +++ b/diabetes_regression/scoring/deployment_config_aci.yml @@ -0,0 +1,4 @@ +computeType: ACI +containerResourceRequirements: + cpu: 1 + memoryInGB: 4 diff --git a/diabetes_regression/scoring/deployment_config_aks.yml b/diabetes_regression/scoring/deployment_config_aks.yml new file mode 100644 index 00000000..1299dc9d --- /dev/null +++ b/diabetes_regression/scoring/deployment_config_aks.yml @@ -0,0 +1,16 @@ +computeType: AKS +autoScaler: + autoscaleEnabled: True + minReplicas: 1 + maxReplicas: 3 + refreshPeriodInSeconds: 10 + targetUtilization: 70 +authEnabled: True +containerResourceRequirements: + cpu: 1 + memoryInGB: 4 +appInsightsEnabled: True +scoringTimeoutMs: 5000 +maxConcurrentRequestsPerContainer: 2 +maxQueueWaitMs: 5000 +sslEnabled: True diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml new file mode 100644 index 00000000..52017bae --- /dev/null +++ b/diabetes_regression/scoring/inference_config.yml @@ -0,0 +1,9 @@ +entryScript: score.py +runtime: python +condaFile: ../conda_dependencies.yml +extraDockerfileSteps: +schemaFile: +sourceDirectory: +enableGpu: False +baseImage: +baseImageRegistry: diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml deleted file mode 100644 index 4d7a42a7..00000000 --- a/diabetes_regression/training_dependencies.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: diabetes_training - -dependencies: - - # The python interpreter version. -- python=3.7.5 - -- numpy=1.18.1 -- pandas=1.0.0 -- scikit-learn=0.22.1 -#- r-essentials -#- tensorflow -#- keras - -- pip=20.0.2 -- pip: - - azureml-core==1.0.79 diff --git a/docs/code_description.md b/docs/code_description.md index 2522b39c..d60df616 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -23,13 +23,12 @@ - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute. - `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API. - `ml_service/pipelines/diabetes_regression_verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model. -- `ml_service/pipelines/deploy_web_service.py` : deploys the model to ACI or AKS. Also contains the deployment configuration for the environments (e.g. CPU, memory, number of replicas in AKS). - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. ### Environment Definitions -- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run). -- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run). +- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file. +- `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run). - `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. ### Code @@ -44,3 +43,4 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. +- `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/ml_service/pipelines/deploy_web_service.py b/ml_service/pipelines/deploy_web_service.py deleted file mode 100644 index bb1fd1f2..00000000 --- a/ml_service/pipelines/deploy_web_service.py +++ /dev/null @@ -1,102 +0,0 @@ -import argparse -import os -from azureml.core import Workspace -from azureml.core.webservice import AciWebservice, AksWebservice -from azureml.core.model import InferenceConfig, Model -from ml_service.util.env_variables import Env -from ml_service.util.manage_environment import get_environment - - -def main(): - parser = argparse.ArgumentParser("deploy_web_service.py") - - parser.add_argument( - "--type", - type=str, - choices=["AKS", "ACI"], - required=True, - help="type of service" - ) - parser.add_argument( - "--service", - type=str, - required=True, - help="Name of the service to deploy" - ) - parser.add_argument( - "--compute_target", - type=str, - help="Name of the compute target. Only applicable if type = AKS" - ) - args = parser.parse_args() - - if args.type == "AKS" and args.compute_target is None: - raise ValueError("--compute_target is required") - - e = Env() - # Get Azure machine learning workspace - aml_workspace = Workspace.get( - name=e.workspace_name, - subscription_id=e.subscription_id, - resource_group=e.resource_group - ) - print("get_workspace:") - print(aml_workspace) - - # Create a reusable scoring environment - environment = get_environment( - aml_workspace, "diabetes_scoring", - "diabetes_regression/scoring_dependencies.yml") - - inference_config = InferenceConfig( - entry_script='score.py', - source_directory=os.path.join(e.sources_directory_train, "scoring"), - environment=environment, - ) - - service_description = f'Scoring model version {e.model_version}' - - if args.type == "AKS": - - deployment_config = AksWebservice.deploy_configuration( - description=service_description, - tags={"BuildId": e.build_id}, - compute_target_name=args.compute_target, - autoscale_enabled=True, - autoscale_min_replicas=1, - autoscale_max_replicas=3, - autoscale_refresh_seconds=10, - autoscale_target_utilization=70, - auth_enabled=True, - cpu_cores=1, - memory_gb=4, - scoring_timeout_ms=5000, - replica_max_concurrent_requests=2, - max_request_wait_time=5000, - ) - - else: - - deployment_config = AciWebservice.deploy_configuration( - description=service_description, - tags={"BuildId": e.build_id}, - cpu_cores=1, - memory_gb=4, - ) - - model = Model(aml_workspace, name=e.model_name, version=e.model_version) - - print(f'Deploying model {model} as service {args.service}') - service = Model.deploy( - workspace=aml_workspace, - name=args.service, - models=[model], - inference_config=inference_config, - deployment_config=deployment_config, - overwrite=True, - ) - service.wait_for_deployment(show_output=True) - - -if __name__ == '__main__': - main() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 5712aeae..013f7e00 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -1,12 +1,11 @@ from azureml.pipeline.core.graph import PipelineParameter from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline -from azureml.core import Workspace +from azureml.core import Workspace, Environment from azureml.core.runconfig import RunConfiguration from azureml.core import Dataset, Datastore from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env -from ml_service.util.manage_environment import get_environment def main(): @@ -30,17 +29,15 @@ def main(): print(aml_compute) # Create a reusable run configuration environment - run_config = RunConfiguration() - run_config.environment = get_environment( - aml_workspace, "diabetes_regression", - "diabetes_regression/training_dependencies.yml") - - config_envvar = {} + environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" - config_envvar["BUILDURI_BASE"] = builduri_base - run_config.environment.environment_variables = config_envvar + environment.environment_variables["BUILDURI_BASE"] = builduri_base + environment.register(aml_workspace) + + run_config = RunConfiguration() + run_config.environment = environment model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index cd93f5f6..5d8cd0ee 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -1,10 +1,9 @@ from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline -from azureml.core import Workspace +from azureml.core import Workspace, Environment from azureml.core.runconfig import RunConfiguration from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env -from ml_service.util.manage_environment import get_environment def main(): @@ -28,10 +27,15 @@ def main(): print(aml_compute) # Create a reusable run configuration environment + environment = Environment.load_from_directory(e.sources_directory_train) + if (e.collection_uri is not None and e.teamproject_name is not None): + builduri_base = e.collection_uri + e.teamproject_name + builduri_base = builduri_base + "/_build/results?buildId=" + environment.environment_variables["BUILDURI_BASE"] = builduri_base + environment.register(aml_workspace) + run_config = RunConfiguration() - run_config.environment = get_environment( - aml_workspace, "diabetes_regression", - "diabetes_regression/training_dependencies.yml") + run_config.environment = environment train_step = PythonScriptStep( name="Train Model", diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py deleted file mode 100644 index c04319d1..00000000 --- a/ml_service/util/manage_environment.py +++ /dev/null @@ -1,51 +0,0 @@ -from azureml.core import Workspace, Environment -import hashlib - - -def get_environment( - workspace: Workspace, - base_name: str, - environment_file: str, -): - """ - Get or create an Azure ML environment definition from a Conda YAML file. - For DevOps scenarios, it's important to have reproducible outcomes, - so that environments should be automatically updated whenever the Conda - YAML file is modified. It's also important that the pipeline can run - in multiple branches in parallel without interference. To enable that, - this function automatically creates an environment name from a base - name with a checksum appended. If that environment already exists, - it is retrieved, otherwise it is created from the Conda file. - """ - - with open(environment_file, 'rb') as file: - checksum = hashlib.sha1(file.read()).hexdigest() - - environment_name = base_name + "_" + checksum - try: - env = Environment.get( - workspace=workspace, name=environment_name) - print(f'Reusing environment {env}') - except Exception: - print(f'Creating environment {environment_name}') - env = create_environment( - workspace, environment_name, environment_file) - return env - - -def create_environment( - workspace: Workspace, - environment_name: str, - environment_file: str, -): - - try: - aml_env = Environment.from_conda_specification( - name=environment_name, - file_path=environment_file) - aml_env.register(workspace) - return aml_env - except Exception as e: - print(e) - print('An error occurred while creating an environment.') - raise From 2214ad03808690af3c91ad057ca06bcbac9a1759 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Sun, 2 Feb 2020 10:26:28 +0100 Subject: [PATCH 20/22] Simplified environment management, restored deploy task --- .pipelines/diabetes_regression-ci-build-train.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 099d8401..b89eb30c 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -105,15 +105,26 @@ stages: timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml + - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 + displayName: 'Azure ML Model Deploy' + inputs: + azureSubscription: $(WORKSPACE_SVC_CONNECTION) + modelSourceType: manualSpec + modelName: '$(MODEL_NAME)' + modelVersion: $(MODEL_VERSION) + inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml' + deploymentTarget: ACI + deploymentName: $(ACI_DEPLOYMENT_NAME) + deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aci.yml' + overwriteExistingDeployment: true - task: AzureCLI@1 - displayName: Azure ML Model Deploy and smoke test + displayName: 'Smoke test' inputs: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' scriptLocation: inlineScript inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) - python -m ml_service.pipelines.deploy_web_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" python -m ml_service.util.smoke_test_scoring_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" - stage: 'Deploy_AKS' From d4a589aabd62ecf5a2cac803e77cf11f01bdcd49 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 3 Feb 2020 21:17:42 +0100 Subject: [PATCH 21/22] Fixed doc for new file path --- docs/getting_started.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 1d75bc05..805db267 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -175,9 +175,9 @@ Great, you now have the build pipeline set up which automatically triggers every pipelines using R to train a model. This is enabled by changing the `build-train-script` pipeline variable to either of: * `diabetes_regression_build_train_pipeline_with_r.py` to train a model -with R on Azure ML Compute. You will also need to add the -`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml` -and `diabetes_regression/training_dependencies.yml`. +with R on Azure ML Compute. You will also need to uncomment (i.e. include) the +`r-essentials` Conda packages in the environment definition +`diabetes_regression/conda_dependencies.yml`. * `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a From fe35f831ee0add4139f671dcba30e45e519e8e3b Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 3 Feb 2020 21:25:09 +0100 Subject: [PATCH 22/22] Added comments --- .../pipelines/diabetes_regression_build_train_pipeline.py | 1 + .../diabetes_regression_build_train_pipeline_with_r.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 3921b490..b7d32f99 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -29,6 +29,7 @@ def main(): print(aml_compute) # Create a reusable run configuration environment + # Read definition from diabetes_regression/azureml_environment.json environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 5d8cd0ee..96ddf2cf 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -27,6 +27,9 @@ def main(): print(aml_compute) # Create a reusable run configuration environment + # Read definition from diabetes_regression/azureml_environment.json + # Make sure to include `r-essentials' + # in diabetes_regression/conda_dependencies.yml environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name