microsoft · sudivate · Jan 31, 2020 · Nov 28, 2019 · Jan 19, 2020 · Jan 29, 2020
diff --git a/.gitignore b/.gitignore
@@ -93,6 +93,7 @@ ENV/
 env.bak/
 venv.bak/
 *.vscode
+condaenv.*
 
 # Spyder project settings
 .spyderproject

diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml
@@ -0,0 +1,28 @@
+name: mlopspython_ci
+
+dependencies:
+
+  # The python interpreter version.
+- python=3.7.5
+
+- r=3.6.0
+- r-essentials=3.6.0
+- numpy=1.18.1
+- pandas=1.0.0
+- scikit-learn=0.22.1
+
+- pip=20.0.2
+- pip:
+
+  # dependencies shared with other environment .yml files.
+  - azureml-sdk==1.0.79
+
+  # Additional pip dependencies for the CI environment.
+  - pytest==5.3.1
+  - pytest-cov==2.8.1
+  - requests==2.22.0
+  - python-dotenv==0.10.3
+  - flake8==3.7.9
+  - flake8_formatter_junit_xml==0.0.6
+  - azure-cli==2.0.77
+  - tox==3.14.3
diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml
@@ -1,9 +1,9 @@
 entryScript: score.py
 runtime: python
-condaFile: conda_dependencies.yml
+condaFile: ../scoring_dependencies.yml
 extraDockerfileSteps:
 schemaFile:
 sourceDirectory:
 enableGpu: False
 baseImage:
-baseImageRegistry:
+baseImageRegistry:
diff --git a/...regression/scoring/conda_dependencies.yml → diabetes_regression/scoring_dependencies.yml b/...regression/scoring/conda_dependencies.yml → diabetes_regression/scoring_dependencies.yml
@@ -14,24 +14,23 @@
 # This directive is stored in a comment to preserve the Conda file structure.
 # [AzureMlVersion] = 2
 
-name: project_environment
+name: diabetes_scoring
+
 dependencies:
+
   # The python interpreter version.
-  # Currently Azure ML Workbench only supports 3.5.2 and later.
 - python=3.7.5
+
   # Required by azureml-defaults, installed separately through Conda to
   # get a prebuilt version and not require build tools for the install.
 - psutil=5.6 #latest
 
+- numpy=1.18.1
+- pandas=1.0.0
+- scikit-learn=0.22.1
+
+- pip=20.0.2
 - pip:
-    # Required packages for AzureML execution, history, and data preparation.
-  - azureml-model-management-sdk==1.0.1b6.post1
-  - azureml-sdk==1.0.74
-  - scipy==1.3.1
-  - scikit-learn==0.22
-  - pandas==0.25.3
-  - numpy==1.17.3
-  - joblib==0.14.0
-  - gunicorn==19.9.0 
-  - flask==1.1.1
-  - inference-schema[numpy-support]
+  # You must list azureml-defaults as a pip dependency
+  - azureml-defaults==1.0.85
+  - inference-schema[numpy-support]==1.0.1
diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml
@@ -0,0 +1,18 @@
+name: diabetes_training
+
+dependencies:
+
+  # The python interpreter version.
+- python=3.7.5
+
+- numpy=1.18.1
+- pandas=1.0.0
+- scikit-learn=0.22.1
+#- r
+#- r-essentials
+#- tensorflow
+#- keras
+
+- pip=20.0.2
+- pip:
+  - azureml-core==1.0.79
diff --git a/docs/code_description.md b/docs/code_description.md
@@ -2,9 +2,9 @@
 
 ### Environment Setup
 
-- `environment_setup/requirements.txt` : It consists of a list of python packages which are needed by the train.py to run successfully on host agent (locally).
+- `environment_setup/ci_environment.yml` : Conda environment definition for the CI environment.
 
-- `environment_setup/install_requirements.sh` : This script prepares the python environment i.e. install the Azure ML SDK and the packages specified in requirements.txt
+- `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions.
 
 - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates.
 
@@ -32,12 +32,13 @@
 - `diabetes_regression/training/train.py` : a training step of an ML training pipeline.
 - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
 - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.
+- `diabetes_regression/training/training_dependencies.yml` : contains a list of dependencies required by train.py to be installed in a deployable Docker Image
 - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv).
 - `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute 
 - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute
 - `diabetes_regression/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model
 
 ### Scoring
 - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.
-- `diabetes_regression/scoring/conda_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image 
+- `diabetes_regression/scoring/scoring_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image 
 - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets.
diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile
@@ -4,11 +4,16 @@ LABEL org.label-schema.vendor = "Microsoft" \
       org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \
       org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython"
 
-       
+COPY diabetes_regression/ci_dependencies.yml /setup/
 
-COPY environment_setup/requirements.txt  /setup/
-
-RUN apt-get update && apt-get install gcc -y && pip install --upgrade -r /setup/requirements.txt && \ 
-    conda install -c r r-essentials
+RUN conda env create -f /setup/ci_dependencies.yml
 
-CMD ["python"]
+# activate environment
+ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH
+RUN /bin/bash -c "source activate mlopspython_ci"
+
+# Verify conda installation.
+# This serves as workaround for https://github.com/conda/conda/issues/8537 (conda env create doesn't fail
+# if pip installation fails, for example due to a wrong package version).
+# The `az` command is not available if pip has not run (and installed azure-cli).
+RUN az --version
diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh
@@ -24,6 +24,8 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
+set -eux
 
-python --version
-pip install -r requirements.txt
+conda env create -f diabetes_regression/ci_dependencies.yml
+
+conda activate mlopspython_ci
diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt
diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py
@@ -28,14 +28,10 @@ def main():
         print("aml_compute:")
         print(aml_compute)
 
-    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
-        conda_packages=['numpy', 'pandas',
-                        'scikit-learn', 'tensorflow', 'keras'],
-        pip_packages=['azure', 'azureml-core',
-                      'azure-storage',
-                      'azure-storage-blob',
-                      'azureml-dataprep'])
-    )
+    # Create a run configuration environment
+    conda_deps_file = "diabetes_regression/training_dependencies.yml"
+    conda_deps = CondaDependencies(conda_deps_file)
+    run_config = RunConfiguration(conda_dependencies=conda_deps)
     run_config.environment.docker.enabled = True
     config_envvar = {}
     if (e.collection_uri is not None and e.teamproject_name is not None):

diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py
@@ -26,15 +26,11 @@ def main():
         print("aml_compute:")
         print(aml_compute)
 
-    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
-        conda_packages=['numpy', 'pandas',
-                        'scikit-learn', 'tensorflow', 'keras'],
-        pip_packages=['azure', 'azureml-core',
-                      'azure-storage',
-                      'azure-storage-blob'])
-    )
+    # Create a run configuration environment
+    conda_deps_file = "diabetes_regression/training_dependencies.yml"
+    conda_deps = CondaDependencies(conda_deps_file)
+    run_config = RunConfiguration(conda_dependencies=conda_deps)
     run_config.environment.docker.enabled = True
-    run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python"
 
     train_step = PythonScriptStep(
         name="Train Model",

diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
@@ -15,7 +15,7 @@ def test_train_model():
     run = Mock(Run)
     reg = train_model(run, data, alpha=1.2)
 
-    run.log.assert_called_with("mse", 0.029843893480256872,
+    run.log.assert_called_with("mse", 0.029843893480257067,
                                description='Mean squared error metric')
 
     preds = reg.predict([[1], [2]])