microsoft · sbaidachni · Jan 25, 2020 · Jan 24, 2020 · Jan 24, 2020 · Jan 24, 2020
diff --git a/.env.example b/.env.example
@@ -30,7 +30,7 @@ TRAINING_PIPELINE_NAME = 'Training Pipeline'
 MODEL_PATH = ''
 EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
 REGISTER_SCRIPT_PATH = 'register/register_model.py'
-SOURCES_DIR_TRAIN = 'code'
+SOURCES_DIR_TRAIN = 'diabetes_regression'
 DATASET_NAME = 'diabetes_ds'
 DATASTORE_NAME = 'datablobstore'
 DATAFILE_NAME = 'diabetes.csv'

diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
@@ -11,7 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest
 
 
 variables:
-- template: azdo-variables.yml
+- template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
 

diff --git a/.pipelines/azdo-ci-build-train.yml → ...es/diabetes_regression-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml → ...es/diabetes_regression-ci-build-train.yml
@@ -4,14 +4,14 @@ trigger:
     include:
     - master
   paths:
-    exclude:
-    - docs/
-    - environment_setup/
-    - ml_service/util/create_scoring_image.*
-    - ml_service/util/smoke_test_scoring_service.py
+    include:
+    - diabetes_regression/
+    - ml_service/pipelines/diabetes_regression_build_train_pipeline.py
+    - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py
+    - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py
 
 variables:
-- template: azdo-variables.yml
+- template: diabetes_regression-variables.yml
 - group: devopsforai-aml-vg
 
 

diff --git a/.pipelines/azdo-ci-image.yml → .pipelines/diabetes_regression-ci-image.yml b/.pipelines/azdo-ci-image.yml → .pipelines/diabetes_regression-ci-image.yml
@@ -7,10 +7,10 @@ trigger:
     include:
     - ml_service/util/create_scoring_image.py
     - ml_service/util/Dockerfile
-    - code/scoring/
+    - diabetes_regression/scoring/
     exclude:
-    - code/scoring/deployment_config_aci.yml
-    - code/scoring/deployment_config_aks.yml    
+    - diabetes_regression/scoring/deployment_config_aci.yml
+    - diabetes_regression/scoring/deployment_config_aks.yml    
 
 pool: 
   vmImage: 'ubuntu-latest'

diff --git a/.pipelines/azdo-variables.yml → .pipelines/diabetes_regression-variables.yml b/.pipelines/azdo-variables.yml → .pipelines/diabetes_regression-variables.yml
@@ -15,7 +15,7 @@ variables:
   value: lowpriority
   # Training Config
 - name: BUILD_TRAIN_SCRIPT
-  value: build_train_pipeline.py
+  value: diabetes_regression_build_train_pipeline.py
 - name: TRAIN_SCRIPT_PATH
   value: training/train.py
 - name: MODEL_NAME
@@ -24,17 +24,17 @@ variables:
   value: '1'
   # AML Pipeline Config 
 - name: TRAINING_PIPELINE_NAME
-  value: 'Training-Pipeline'
+  value: 'diabetes-Training-Pipeline'
 - name: MODEL_PATH
   value: ''
 - name: EVALUATE_SCRIPT_PATH
   value: evaluate/evaluate_model.py
 - name: REGISTER_SCRIPT_PATH
   value: register/register_model.py
 - name: SOURCES_DIR_TRAIN
-  value: code
+  value: diabetes_regression
 - name: IMAGE_NAME
-  value: 'mltrained'
+  value: 'diabetestrained'
   # Optional. Used by a training pipeline with R on Databricks
 - name: DB_CLUSTER_ID
   value: ''

diff --git a/code/evaluate/evaluate_model.py → ...tes_regression/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py → ...tes_regression/evaluate/evaluate_model.py
@@ -36,7 +36,7 @@
     load_dotenv()
     sources_dir = os.environ.get("SOURCES_DIR_TRAIN")
     if (sources_dir is None):
-        sources_dir = 'code'
+        sources_dir = 'diabetes_regression'
     path_to_util = os.path.join(".", sources_dir, "util")
     sys.path.append(os.path.abspath(path_to_util))  # NOQA: E402
     from model_helper import get_model_by_tag

diff --git a/code/register/register_model.py → ...tes_regression/register/register_model.py b/code/register/register_model.py → ...tes_regression/register/register_model.py
diff --git a/code/scoring/conda_dependencies.yml → ...regression/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml → ...regression/scoring/conda_dependencies.yml
diff --git a/code/scoring/deployment_config_aci.yml → ...ression/scoring/deployment_config_aci.yml b/code/scoring/deployment_config_aci.yml → ...ression/scoring/deployment_config_aci.yml
diff --git a/code/scoring/deployment_config_aks.yml → ...ression/scoring/deployment_config_aks.yml b/code/scoring/deployment_config_aks.yml → ...ression/scoring/deployment_config_aks.yml
diff --git a/code/scoring/inference_config.yml → ...s_regression/scoring/inference_config.yml b/code/scoring/inference_config.yml → ...s_regression/scoring/inference_config.yml
diff --git a/code/scoring/score.py → diabetes_regression/scoring/score.py b/code/scoring/score.py → diabetes_regression/scoring/score.py
diff --git a/code/scoring/scoreA.py → diabetes_regression/scoring/scoreA.py b/code/scoring/scoreA.py → diabetes_regression/scoring/scoreA.py
diff --git a/code/scoring/scoreB.py → diabetes_regression/scoring/scoreB.py b/code/scoring/scoreB.py → diabetes_regression/scoring/scoreB.py
diff --git a/code/training/R/r_train.r → diabetes_regression/training/R/r_train.r b/code/training/R/r_train.r → diabetes_regression/training/R/r_train.r
diff --git a/code/training/R/train_with_r.py → ...tes_regression/training/R/train_with_r.py b/code/training/R/train_with_r.py → ...tes_regression/training/R/train_with_r.py
diff --git a/.../training/R/train_with_r_on_databricks.py → .../training/R/train_with_r_on_databricks.py b/.../training/R/train_with_r_on_databricks.py → .../training/R/train_with_r_on_databricks.py
diff --git a/code/training/R/weight_data.csv → ...tes_regression/training/R/weight_data.csv b/code/training/R/weight_data.csv → ...tes_regression/training/R/weight_data.csv
diff --git a/code/training/train.py → diabetes_regression/training/train.py b/code/training/train.py → diabetes_regression/training/train.py
diff --git a/code/util/model_helper.py → diabetes_regression/util/model_helper.py b/code/util/model_helper.py → diabetes_regression/util/model_helper.py
diff --git a/docs/code_description.md b/docs/code_description.md
@@ -15,31 +15,29 @@
 ### Pipelines
 
 - `.pipelines/azdo-base-pipeline.yml` : a pipeline template used by ci-build-train pipeline and pr-build-train pipelines. It contains steps performing linting, data and unit testing.  
-- `.pipelines/azdo-ci-build-train.yml` : a pipeline triggered when the code is merged into **master**. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline.
+- `.pipelines/diabetes_regression-ci-build-train.yml` : a pipeline triggered when the code is merged into **master**. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline.
 - `.pipelines/azdo-pr-build-train.yml` : a pipeline triggered when a **pull request** to the **master** branch is created. It performs linting, data integrity testing and unit testing only.
 
 ### ML Services
 
-- `ml_service/pipelines/build_train_pipeline.py` : builds and publishes an ML training pipeline. It uses Python on ML Compute.
-- `ml_service/pipelines/build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute.
-- `ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute.
+- `ml_service/pipelines/diabetes_regression_build_train_pipeline.py` : builds and publishes an ML training pipeline. It uses Python on ML Compute.
+- `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute.
+- `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute.
 - `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API.
 - `ml_service/pipelines/verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model.
 - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline.
 
 ### Code
 
-- `code/training/train.py` : a training step of an ML training pipeline.
-- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
-- `code/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.
-- `code/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv).
-- `code/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute 
-- `code/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute
-- `code/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model
+- `diabetes_regression/training/train.py` : a training step of an ML training pipeline.
+- `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
+- `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.
+- `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv).
+- `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute 
+- `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute
+- `diabetes_regression/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model
 
 ### Scoring
-- code/scoring/score.py : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.
-- code/scoring/conda_dependencies.yml : contains a list of dependencies required by score.py to be installed in a deployable Docker Image 
-- code/scoring/inference_config.yml, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets.
-
-
+- `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.
+- `diabetes_regression/scoring/conda_dependencies.yml` : contains a list of dependencies required by score.py to be installed in a deployable Docker Image 
+- `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -87,7 +87,7 @@ Please be aware that the local environment also needs access to the Azure subscr
 
 ### Azure DevOps configuration
 
-For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements.
+For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/diabetes_regression-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements.
 
 Up until now you should have:
 
@@ -131,7 +131,7 @@ Install the **Azure Machine Learning** extension to your organization from the
 [marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml),
 so that you can set up a service connection to your AML workspace.
 
-Create a service connection to your ML workspace via the [Azure DevOps Azure ML task instructions](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) to be able to execute the Azure ML training pipeline. The connection name specified here needs to be used for the value of the `WORKSPACE_SVC_CONNECTION` set in the variable group below.
+Create a service connection to your ML workspace via the [Azure DevOps Azure ML task instructions](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) to be able to execute the Azure ML training pipeline. The connection name specified here needs to be used for the value of the `WORKSPACE_SVC_CONNECTION` set in the variable group above.
 
 **Note:** Creating service connection with Azure Machine Learning workspace scope requires 'Owner' or 'User Access Administrator' permissions on the Workspace.
 You must also have sufficient permissions to register an application with
@@ -154,7 +154,7 @@ environments, or alternatively to Azure App Service.
 ### Set up the Pipeline
 
 In your [Azure DevOps](https://dev.azure.com) project create and run a new build
-pipeline referring to the [azdo-ci-build-train.yml](../.pipelines/azdo-ci-build-train.yml)
+pipeline referring to the [diabetes_regression-ci-build-train.yml](../.pipelines/azdo-ci-build-train.yml)
 pipeline definition in your forked repository:
 
 ![configure ci build pipeline](./images/ci-build-pipeline-configure.png)
@@ -174,7 +174,7 @@ Great, you now have the build pipeline set up which automatically triggers every
 
   **Note:** The build pipeline also supports building and publishing ML
 pipelines using R to train a model. This is enabled
-by changing the `build-train-script` pipeline variable to either `build_train_pipeline_with_r.py`, or `build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need
+by changing the `build-train-script` pipeline variable to either `diabetes_regression_build_train_pipeline_with_r.py`, or `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need
 to manually create a Databricks cluster and attach it to the ML Workspace as a
 compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be
 specified).
@@ -189,7 +189,7 @@ Wait until the pipeline finishes and verify that there is a new model in the **M
 
 ![trained model](./images/trained-model.png)
 
-To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\azdo-ci-build-train.yml` pipeline to `false`.  This can also be overridden at runtime execution of the pipeline.
+To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\diabetes_regression-ci-build-train.yml` pipeline to `false`.  This can also be overridden at runtime execution of the pipeline.
 
 ### Deploy the Model to Azure Kubernetes Service
 

diff --git a/ml_service/pipelines/build_train_pipeline.py → ...abetes_regression_build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py → ...abetes_regression_build_train_pipeline.py
diff --git a/.../pipelines/build_train_pipeline_with_r.py → ...regression_build_train_pipeline_with_r.py b/.../pipelines/build_train_pipeline_with_r.py → ...regression_build_train_pipeline_with_r.py
@@ -43,7 +43,7 @@ def main():
         name="Train Model",
         script_name="train_with_r.py",
         compute_target=aml_compute,
-        source_directory="code/training/R",
+        source_directory="diabetes_regression/training/R",
         runconfig=run_config,
         allow_reuse=False,
     )

diff --git a/...build_train_pipeline_with_r_on_dbricks.py → ...build_train_pipeline_with_r_on_dbricks.py b/...build_train_pipeline_with_r_on_dbricks.py → ...build_train_pipeline_with_r_on_dbricks.py
@@ -32,7 +32,7 @@ def main():
         name="DBPythonInLocalMachine",
         num_workers=1,
         python_script_name="train_with_r_on_databricks.py",
-        source_directory="code/training/R",
+        source_directory="diabetes_regression/training/R",
         run_name='DB_Python_R_demo',
         existing_cluster_id=e.db_cluster_id,
         compute_target=aml_compute,

diff --git a/ml_service/pipelines/verify_train_pipeline.py b/ml_service/pipelines/verify_train_pipeline.py
@@ -15,7 +15,7 @@ def main():
         load_dotenv()
         sources_dir = os.environ.get("SOURCES_DIR_TRAIN")
         if (sources_dir is None):
-            sources_dir = 'code'
+            sources_dir = 'diabetes_regression'
         path_to_util = os.path.join(".", sources_dir, "util")
         sys.path.append(os.path.abspath(path_to_util))  # NOQA: E402
         from model_helper import get_model_by_tag

diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py
@@ -28,7 +28,7 @@
 model = Model(ws, name=e.model_name, version=e.model_version)
 sources_dir = e.sources_directory_train
 if (sources_dir is None):
-    sources_dir = 'code'
+    sources_dir = 'diabetes_regression'
 path_to_scoring = os.path.join(".", sources_dir, "scoring")
 cwd = os.getcwd()
 os.chdir(path_to_scoring)

diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
@@ -3,7 +3,8 @@
 import numpy as np
 from azureml.core.run import Run
 from unittest.mock import Mock
-sys.path.append(os.path.abspath("./code/training"))  # NOQA: E402
+sys.path.append(os.path.abspath(
+        "./diabetes_regression/training"))  # NOQA: E402
 from train import train_model