From c8406234257e63fd5a28757f1e5ed3b1afc097b1 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Wed, 29 Jan 2020 22:55:03 +0100 Subject: [PATCH 01/11] . --- .../scoring/inference_config.yml | 4 +-- ...endencies.yml => scoring_dependencies.yml} | 23 ++++++++--------- .../training/training_dependencies.yml | 17 +++++++++++++ environment_setup/Dockerfile | 17 ++++++++----- environment_setup/ci_environment.yml | 25 +++++++++++++++++++ environment_setup/install_requirements.sh | 9 +++---- environment_setup/requirements.txt | 10 -------- ...iabetes_regression_build_train_pipeline.py | 12 +++------ 8 files changed, 74 insertions(+), 43 deletions(-) rename diabetes_regression/scoring/{conda_dependencies.yml => scoring_dependencies.yml} (74%) create mode 100644 diabetes_regression/training/training_dependencies.yml create mode 100644 environment_setup/ci_environment.yml delete mode 100644 environment_setup/requirements.txt diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index 3f65cf33..ca2c29ce 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -1,9 +1,9 @@ entryScript: score.py runtime: python -condaFile: conda_dependencies.yml +condaFile: scoring_dependencies.yml extraDockerfileSteps: schemaFile: sourceDirectory: enableGpu: False baseImage: -baseImageRegistry: \ No newline at end of file +baseImageRegistry: diff --git a/diabetes_regression/scoring/conda_dependencies.yml b/diabetes_regression/scoring/scoring_dependencies.yml similarity index 74% rename from diabetes_regression/scoring/conda_dependencies.yml rename to diabetes_regression/scoring/scoring_dependencies.yml index c97a2722..6f78c5a6 100644 --- a/diabetes_regression/scoring/conda_dependencies.yml +++ b/diabetes_regression/scoring/scoring_dependencies.yml @@ -14,24 +14,23 @@ # This directive is stored in a comment to preserve the Conda file structure. # [AzureMlVersion] = 2 -name: project_environment +name: diabetes_scoring + dependencies: + # The python interpreter version. - # Currently Azure ML Workbench only supports 3.5.2 and later. - python=3.7.5 + # Required by azureml-defaults, installed separately through Conda to # get a prebuilt version and not require build tools for the install. - psutil=5.6 #latest +- numpy +- pandas +- scikit-learn + +- pip - pip: - # Required packages for AzureML execution, history, and data preparation. - - azureml-model-management-sdk==1.0.1b6.post1 - - azureml-sdk==1.0.74 - - scipy==1.3.1 - - scikit-learn==0.22 - - pandas==0.25.3 - - numpy==1.17.3 - - joblib==0.14.0 - - gunicorn==19.9.0 - - flask==1.1.1 + # You must list azureml-defaults as a pip dependency + - azureml-defaults>=1.0.45 - inference-schema[numpy-support] diff --git a/diabetes_regression/training/training_dependencies.yml b/diabetes_regression/training/training_dependencies.yml new file mode 100644 index 00000000..03dcb89a --- /dev/null +++ b/diabetes_regression/training/training_dependencies.yml @@ -0,0 +1,17 @@ +name: diabetes_training + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- numpy +- pandas +- scikit-learn +- tensorflow +- keras + +- pip +- pip: + - azureml-core + - azure-storage-blob diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 5e7b7581..6585e437 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -4,11 +4,16 @@ LABEL org.label-schema.vendor = "Microsoft" \ org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" - +COPY diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml /setup/ -COPY environment_setup/requirements.txt /setup/ - -RUN apt-get update && apt-get install gcc -y && pip install --upgrade -r /setup/requirements.txt && \ - conda install -c r r-essentials +RUN pip install conda-merge==0.1.5 && \ + cd /setup && conda-merge training_dependencies.yml scoring_dependencies.yml ci_environment.yml > conda_merged.yml && \ + echo "Generated conda environment definition:" && cat conda_merged.yml && \ + conda env create -f conda_merged.yml -CMD ["python"] \ No newline at end of file +# activate environment +ENV PATH /usr/local/envs/ci/bin:$PATH +RUN /bin/bash -c "source activate ci" + +# Install Azure CLI ML extension +RUN az extension add -n azure-cli-ml diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml new file mode 100644 index 00000000..a7aae95f --- /dev/null +++ b/environment_setup/ci_environment.yml @@ -0,0 +1,25 @@ +name: ci + +dependencies: + +- r +- r-essentials +- numpy +- pandas +- scikit-learn + +- pip +- pip: + + # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml. + # Versions specification must match exactly, or the docker build will fail. + - azureml-sdk==1.0.74 + + # Additional pip dependencies for the CI environment. + - pytest>=5.3 + - requests>=2.22 + - numpy>=1.17 + - python-dotenv>=0.10.3 + - flake8>=3.7 + - flake8_formatter_junit_xml + - azure-cli>=2.0.76 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 1bdd081d..752d2959 100644 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -24,8 +24,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. - -python --version -pip install azure-cli==2.0.46 -pip install --upgrade azureml-sdk[cli] -pip install -r requirements.txt \ No newline at end of file +set -eux +pip install conda-merge==0.1.5 +conda-merge environment_setup/ci_environment.yml diabetes_regression/scoring/scoring_dependencies.yml diabetes_regression/training/training_dependencies.yml > /tmp/conda_merged.yml +conda env create -f /tmp/conda_merged.yml diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt deleted file mode 100644 index f99e7f4b..00000000 --- a/environment_setup/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -pytest>=5.3 -requests>=2.22 -numpy>=1.17 -pandas>=0.25 -scikit-learn>=0.21.3 -azureml-sdk>=1.0 -python-dotenv>=0.10.3 -flake8>=3.7 -flake8_formatter_junit_xml -azure-cli==2.0.76 diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 0e963d96..c2440a7b 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -31,14 +31,10 @@ def main(): print("aml_compute:") print(aml_compute) - run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( - conda_packages=['numpy', 'pandas', - 'scikit-learn', 'tensorflow', 'keras'], - pip_packages=['azure', 'azureml-core', - 'azure-storage', - 'azure-storage-blob', - 'azureml-dataprep']) - ) + # Create a reusable run configuration environment + conda_deps_file = "diabetes_regression/training/training_dependencies.yml" + conda_deps = CondaDependencies(conda_deps_file) + run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True config_envvar = {} if (e.collection_uri is not None and e.teamproject_name is not None): From c2b953ff16dbb3491e94172d4c71ee3a3327f7c4 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 04:49:19 +0100 Subject: [PATCH 02/11] . --- environment_setup/Dockerfile | 4 ++-- environment_setup/ci_environment.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 6585e437..dfe1bdce 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -12,8 +12,8 @@ RUN pip install conda-merge==0.1.5 && \ conda env create -f conda_merged.yml # activate environment -ENV PATH /usr/local/envs/ci/bin:$PATH -RUN /bin/bash -c "source activate ci" +ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH +RUN /bin/bash -c "source activate mlopspython_ci" # Install Azure CLI ML extension RUN az extension add -n azure-cli-ml diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml index a7aae95f..c8443aec 100644 --- a/environment_setup/ci_environment.yml +++ b/environment_setup/ci_environment.yml @@ -1,4 +1,4 @@ -name: ci +name: mlopspython_ci dependencies: From bceeba6de8151352f953214af38684c24b8a9d49 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 07:04:25 +0100 Subject: [PATCH 03/11] Update code_test.py --- tests/unit/code_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index c7b10182..0e09a2eb 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -19,7 +19,7 @@ def test_train_model(): run = Mock(Run) reg = train_model(run, data, alpha=1.2) - run.log.assert_called_with("mse", 0.029843893480256872, + run.log.assert_called_with("mse", 0.029843893480257067, description='Mean squared error metric') preds = reg.predict([[1], [2]]) From 897b7d40bc34a551d32213180dcfb728147817ee Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 11:02:09 +0100 Subject: [PATCH 04/11] . --- docs/code_description.md | 7 ++++--- environment_setup/ci_environment.yml | 5 +++-- environment_setup/install_requirements.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) mode change 100644 => 100755 environment_setup/install_requirements.sh diff --git a/docs/code_description.md b/docs/code_description.md index 5a1af307..d8db0c0d 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -2,9 +2,9 @@ ### Environment Setup -- `environment_setup/requirements.txt` : It consists of a list of python packages which are needed by the train.py to run successfully on host agent (locally). +- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment. -- `environment_setup/install_requirements.sh` : This script prepares the python environment i.e. install the Azure ML SDK and the packages specified in requirements.txt +- `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates. @@ -32,6 +32,7 @@ - `diabetes_regression/training/train.py` : a training step of an ML training pipeline. - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. +- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute @@ -39,5 +40,5 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. -- `diabetes_regression/scoring/conda_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image +- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml index c8443aec..d787ad94 100644 --- a/environment_setup/ci_environment.yml +++ b/environment_setup/ci_environment.yml @@ -11,8 +11,9 @@ dependencies: - pip - pip: - # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml. - # Versions specification must match exactly, or the docker build will fail. + # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml + # and/or diabetes_regression/training/training_dependencies.yml. + # If versions are specified, they must match exactly, or the docker build will fail. - azureml-sdk==1.0.74 # Additional pip dependencies for the CI environment. diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh old mode 100644 new mode 100755 index 752d2959..5efc5261 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -26,5 +26,5 @@ set -eux pip install conda-merge==0.1.5 -conda-merge environment_setup/ci_environment.yml diabetes_regression/scoring/scoring_dependencies.yml diabetes_regression/training/training_dependencies.yml > /tmp/conda_merged.yml +conda-merge diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml > /tmp/conda_merged.yml conda env create -f /tmp/conda_merged.yml From e304fd2a2533e09b9fdc55049b8da020e91a9d67 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Thu, 30 Jan 2020 11:08:57 +0100 Subject: [PATCH 05/11] Update Dockerfile --- environment_setup/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index dfe1bdce..41994ca4 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -15,5 +15,8 @@ RUN pip install conda-merge==0.1.5 && \ ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH RUN /bin/bash -c "source activate mlopspython_ci" -# Install Azure CLI ML extension +# Install Azure CLI ML extension. +# This also serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail +# if pip installation fails, for example due to different version specs in the various environment files). +# The `az` command is not available if pip has not installed azure-cli. RUN az extension add -n azure-cli-ml From 41c2499270e88af9eecfe91a245e5a34aa906f75 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 07:00:42 +0100 Subject: [PATCH 06/11] Do not use conda-merge * Move all 3 conda files to a single dir * Do not use conda-merge * Pin package versions --- diabetes_regression/ci_dependencies.yml | 28 +++++++++++++++++++ .../scoring/inference_config.yml | 2 +- .../{scoring => }/scoring_dependencies.yml | 12 ++++---- .../training/training_dependencies.yml | 17 ----------- diabetes_regression/training_dependencies.yml | 17 +++++++++++ environment_setup/Dockerfile | 7 ++--- environment_setup/ci_environment.yml | 26 ----------------- environment_setup/install_requirements.sh | 4 +-- ...iabetes_regression_build_train_pipeline.py | 2 +- 9 files changed, 56 insertions(+), 59 deletions(-) create mode 100644 diabetes_regression/ci_dependencies.yml rename diabetes_regression/{scoring => }/scoring_dependencies.yml (89%) delete mode 100644 diabetes_regression/training/training_dependencies.yml create mode 100644 diabetes_regression/training_dependencies.yml delete mode 100644 environment_setup/ci_environment.yml diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml new file mode 100644 index 00000000..a61731c4 --- /dev/null +++ b/diabetes_regression/ci_dependencies.yml @@ -0,0 +1,28 @@ +name: mlopspython_ci + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- r=3.6.0 +- r-essentials=3.6.0 +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 + +- pip=20.0.2 +- pip: + + # dependencies shared with other environment .yml files. + - azureml-sdk==1.0.79 + + # Additional pip dependencies for the CI environment. + - pytest==5.3.1 + - pytest-cov==2.8.1 + - requests==2.22.0 + - python-dotenv==0.10.3 + - flake8==3.7.9 + - flake8_formatter_junit_xml==0.0.6 + - azure-cli==2.0.77 + - tox==3.14.3 diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index ca2c29ce..42947da8 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -1,6 +1,6 @@ entryScript: score.py runtime: python -condaFile: scoring_dependencies.yml +condaFile: ../scoring_dependencies.yml extraDockerfileSteps: schemaFile: sourceDirectory: diff --git a/diabetes_regression/scoring/scoring_dependencies.yml b/diabetes_regression/scoring_dependencies.yml similarity index 89% rename from diabetes_regression/scoring/scoring_dependencies.yml rename to diabetes_regression/scoring_dependencies.yml index 6f78c5a6..26ce3622 100644 --- a/diabetes_regression/scoring/scoring_dependencies.yml +++ b/diabetes_regression/scoring_dependencies.yml @@ -25,12 +25,12 @@ dependencies: # get a prebuilt version and not require build tools for the install. - psutil=5.6 #latest -- numpy -- pandas -- scikit-learn +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 -- pip +- pip=20.0.2 - pip: # You must list azureml-defaults as a pip dependency - - azureml-defaults>=1.0.45 - - inference-schema[numpy-support] + - azureml-defaults==1.0.85 + - inference-schema[numpy-support]==1.0.1 diff --git a/diabetes_regression/training/training_dependencies.yml b/diabetes_regression/training/training_dependencies.yml deleted file mode 100644 index 03dcb89a..00000000 --- a/diabetes_regression/training/training_dependencies.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: diabetes_training - -dependencies: - - # The python interpreter version. -- python=3.7.5 - -- numpy -- pandas -- scikit-learn -- tensorflow -- keras - -- pip -- pip: - - azureml-core - - azure-storage-blob diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml new file mode 100644 index 00000000..9a8bd6cf --- /dev/null +++ b/diabetes_regression/training_dependencies.yml @@ -0,0 +1,17 @@ +name: diabetes_training + +dependencies: + + # The python interpreter version. +- python=3.7.5 + +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 +#- tensorflow +#- keras + +- pip=20.0.2 +- pip: + - azureml-core==1.0.79 + - azure-storage-blob==12.1.0 diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 41994ca4..5c7f62dc 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -4,12 +4,9 @@ LABEL org.label-schema.vendor = "Microsoft" \ org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" -COPY diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml /setup/ +COPY diabetes_regression/ci_dependencies.yml /setup/ -RUN pip install conda-merge==0.1.5 && \ - cd /setup && conda-merge training_dependencies.yml scoring_dependencies.yml ci_environment.yml > conda_merged.yml && \ - echo "Generated conda environment definition:" && cat conda_merged.yml && \ - conda env create -f conda_merged.yml +RUN conda env create -f /setup/ci_dependencies.yml # activate environment ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH diff --git a/environment_setup/ci_environment.yml b/environment_setup/ci_environment.yml deleted file mode 100644 index d787ad94..00000000 --- a/environment_setup/ci_environment.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: mlopspython_ci - -dependencies: - -- r -- r-essentials -- numpy -- pandas -- scikit-learn - -- pip -- pip: - - # dependencies shared with diabetes_regression/scoring/scoring_dependencies.yml - # and/or diabetes_regression/training/training_dependencies.yml. - # If versions are specified, they must match exactly, or the docker build will fail. - - azureml-sdk==1.0.74 - - # Additional pip dependencies for the CI environment. - - pytest>=5.3 - - requests>=2.22 - - numpy>=1.17 - - python-dotenv>=0.10.3 - - flake8>=3.7 - - flake8_formatter_junit_xml - - azure-cli>=2.0.76 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 5efc5261..9ed59899 100755 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -25,6 +25,4 @@ # POSSIBILITY OF SUCH DAMAGE. set -eux -pip install conda-merge==0.1.5 -conda-merge diabetes_regression/training/training_dependencies.yml diabetes_regression/scoring/scoring_dependencies.yml environment_setup/ci_environment.yml > /tmp/conda_merged.yml -conda env create -f /tmp/conda_merged.yml +conda env create -f diabetes_regression/ci_dependencies.yml diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index c2440a7b..b4879c45 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -32,7 +32,7 @@ def main(): print(aml_compute) # Create a reusable run configuration environment - conda_deps_file = "diabetes_regression/training/training_dependencies.yml" + conda_deps_file = "diabetes_regression/training_dependencies.yml" conda_deps = CondaDependencies(conda_deps_file) run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True From 921b65f9e1c7a19f8aa7271db7a2a9b910ebb7cf Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 07:35:19 +0100 Subject: [PATCH 07/11] PR review fixes --- .gitignore | 1 + diabetes_regression/training_dependencies.yml | 3 ++- environment_setup/install_requirements.sh | 3 +++ .../diabetes_regression_build_train_pipeline.py | 2 +- ...iabetes_regression_build_train_pipeline_with_r.py | 12 ++++-------- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 7bac8768..02d3c963 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,7 @@ ENV/ env.bak/ venv.bak/ *.vscode +condaenv.* # Spyder project settings .spyderproject diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml index 9a8bd6cf..48f398fb 100644 --- a/diabetes_regression/training_dependencies.yml +++ b/diabetes_regression/training_dependencies.yml @@ -8,10 +8,11 @@ dependencies: - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 +#- r +#- r-essentials #- tensorflow #- keras - pip=20.0.2 - pip: - azureml-core==1.0.79 - - azure-storage-blob==12.1.0 diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh index 9ed59899..989e8b1e 100755 --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -25,4 +25,7 @@ # POSSIBILITY OF SUCH DAMAGE. set -eux + conda env create -f diabetes_regression/ci_dependencies.yml + +conda activate mlopspython_ci diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index aeb9a2b9..b127d9ee 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -28,7 +28,7 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a reusable run configuration environment + # Create a run configuration environment conda_deps_file = "diabetes_regression/training_dependencies.yml" conda_deps = CondaDependencies(conda_deps_file) run_config = RunConfiguration(conda_dependencies=conda_deps) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index cb47cdf5..4f71625f 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -26,15 +26,11 @@ def main(): print("aml_compute:") print(aml_compute) - run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( - conda_packages=['numpy', 'pandas', - 'scikit-learn', 'tensorflow', 'keras'], - pip_packages=['azure', 'azureml-core', - 'azure-storage', - 'azure-storage-blob']) - ) + # Create a run configuration environment + conda_deps_file = "diabetes_regression/training_dependencies.yml" + conda_deps = CondaDependencies(conda_deps_file) + run_config = RunConfiguration(conda_dependencies=conda_deps) run_config.environment.docker.enabled = True - run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python" train_step = PythonScriptStep( name="Train Model", From b05c3b7d625e69eed2f5090e97c698914dac1f43 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 08:08:00 +0100 Subject: [PATCH 08/11] Update Dockerfile --- environment_setup/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile index 5c7f62dc..4137967a 100644 --- a/environment_setup/Dockerfile +++ b/environment_setup/Dockerfile @@ -12,8 +12,8 @@ RUN conda env create -f /setup/ci_dependencies.yml ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH RUN /bin/bash -c "source activate mlopspython_ci" -# Install Azure CLI ML extension. -# This also serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail -# if pip installation fails, for example due to different version specs in the various environment files). -# The `az` command is not available if pip has not installed azure-cli. -RUN az extension add -n azure-cli-ml +# Verify conda installation. +# This serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail +# if pip installation fails, for example due to a wrong package version). +# The `az` command is not available if pip has not run (and installed azure-cli). +RUN az --version From de72bdec77674156a1a12a5ccf54ff38c5e24781 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 21:11:04 +0100 Subject: [PATCH 09/11] PR review fixes --- docs/code_description.md | 10 ++++++---- docs/getting_started.md | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/code_description.md b/docs/code_description.md index 37bb005f..d69a6f30 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -2,8 +2,6 @@ ### Environment Setup -- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment. - - `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates. @@ -27,12 +25,17 @@ - `ml_service/pipelines/diabetes_regression_verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model. - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. +### Environment Definitions + +- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run). +- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run). +- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. + ### Code - `diabetes_regression/training/train.py` : a training step of an ML training pipeline. - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. -- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute @@ -40,5 +43,4 @@ ### Scoring - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. -- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. diff --git a/docs/getting_started.md b/docs/getting_started.md index bbd20506..8b3167e4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -171,7 +171,13 @@ Great, you now have the build pipeline set up which automatically triggers every **Note:** The build pipeline also supports building and publishing ML pipelines using R to train a model. This is enabled -by changing the `build-train-script` pipeline variable to either `diabetes_regression_build_train_pipeline_with_r.py`, or `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need +by changing the `build-train-script` pipeline variable to either of: +* `diabetes_regression_build_train_pipeline_with_r.py` to train a model +with R on Azure ML Compute. You will also need to add the +`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml` +and `diabetes_regression/training_dependencies.yml`. +* `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` +to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be specified). @@ -243,6 +249,7 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. * The sample pipeline generates a random value for a model hyperparameter (ridge regression [*alpha*](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)) to generate 'interesting' charts when testing the sample. In a real application you should use fixed hyperparameter values. You can [tune hyperparameter values using Azure ML](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters), and manage their values in Azure DevOps Variable Groups. * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. +* You can install additional Conda or pip packages by modifying the YAML environment configurations under the `diabetes_regression` directory. Make sure to use fixed version numbers for all packages to ensure reproducibility, and use the same versions across environments. * You can explore aspects of model observability in the solution, such as: * **Logging**: navigate to the Application Insights instance linked to the Azure ML Portal, then to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs From 568bdeec21229b18923ba6f365b415b136febff0 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 21:12:52 +0100 Subject: [PATCH 10/11] Update training_dependencies.yml --- diabetes_regression/training_dependencies.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml index 48f398fb..4d7a42a7 100644 --- a/diabetes_regression/training_dependencies.yml +++ b/diabetes_regression/training_dependencies.yml @@ -8,7 +8,6 @@ dependencies: - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 -#- r #- r-essentials #- tensorflow #- keras From f590807fdd875b4faa294294b49e483fd0bdf8b1 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Fri, 31 Jan 2020 22:43:02 +0100 Subject: [PATCH 11/11] Update code_test.py --- tests/unit/code_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index 1b133766..e8457f95 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -15,7 +15,7 @@ def test_train_model(): run = Mock(Run) reg = train_model(run, data, alpha=1.2) - run.log.assert_called_with("mse", 0.029843893480257067, + run.log.assert_called_with("mse", 0.029843893480256872, description='Mean squared error metric') preds = reg.predict([[1], [2]])