From a9834665f48a435ee53585949078724dcfcc8dd0 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 12:19:09 -0800
Subject: [PATCH 1/9] remove alpha from pipeline parameters

---
 .../diabetes_regression-ci-build-train.yml    | 12 +---------
 diabetes_regression/training/train.json       |  3 +++
 diabetes_regression/training/train.py         | 23 +++++++++++--------
 ...iabetes_regression_build_train_pipeline.py |  3 ---
 4 files changed, 18 insertions(+), 23 deletions(-)
 create mode 100644 diabetes_regression/training/train.json

diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml
index b89eb30c..9d9ed3b1 100644
--- a/.pipelines/diabetes_regression-ci-build-train.yml
+++ b/.pipelines/diabetes_regression-ci-build-train.yml
@@ -62,22 +62,12 @@ stages:
           echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
       name: 'getpipelineid'
       displayName: 'Get Pipeline ID'
-    - bash: |
-          # Generate a hyperparameter value as a random number between 0 and 1.
-          # A random value is used here to make the Azure ML dashboards "interesting" when testing
-          # the solution sample.
-          alpha=$(printf "0.%03d\n" $((($RANDOM*1000)/32767)))
-          echo "Alpha: $alpha"
-          echo "##vso[task.setvariable variable=ALPHA;isOutput=true]$alpha"
-      name: 'getalpha'
-      displayName: 'Generate random value for hyperparameter alpha'
   - job: "Run_ML_Pipeline"
     dependsOn: "Get_Pipeline_ID"
     displayName: "Trigger ML Training Pipeline"
     pool: server
     variables:
       AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ]
-      ALPHA: $[ dependencies.Get_Pipeline_ID.outputs['getalpha.ALPHA'] ]
     steps:
     - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
       displayName: 'Invoke ML pipeline'
@@ -85,7 +75,7 @@ stages:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         PipelineId: '$(AMLPIPELINE_ID)'
         ExperimentName: '$(EXPERIMENT_NAME)'
-        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}'
+        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}'
   - job: "Training_Run_Report"
     dependsOn: "Run_ML_Pipeline"
     condition: always()
diff --git a/diabetes_regression/training/train.json b/diabetes_regression/training/train.json
new file mode 100644
index 00000000..3158cd1b
--- /dev/null
+++ b/diabetes_regression/training/train.json
@@ -0,0 +1,3 @@
+{
+    "alpha": 0.4
+}
diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index f56daa99..67d6059b 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -32,6 +32,7 @@
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from sklearn.externals import joblib
+import json
 
 
 def train_model(run, data, alpha):
@@ -46,6 +47,10 @@ def train_model(run, data, alpha):
         preds, data["test"]["y"]), description="Mean squared error metric")
     return reg
 
+def get_model_parameters():
+    with open("train.json") as f:
+        data=json.load(f)
+    return data
 
 def main():
     print("Running train.py")
@@ -62,13 +67,6 @@ def main():
         help="Name of the Model",
         default="sklearn_regression_model.pkl",
     )
-    parser.add_argument(
-        "--alpha",
-        type=float,
-        default=0.5,
-        help=("Ridge regression regularization strength hyperparameter; "
-              "must be a positive float.")
-    )
 
     parser.add_argument(
         "--dataset_name",
@@ -79,14 +77,21 @@ def main():
 
     print("Argument [build_id]: %s" % args.build_id)
     print("Argument [model_name]: %s" % args.model_name)
-    print("Argument [alpha]: %s" % args.alpha)
     print("Argument [dataset_name]: %s" % args.dataset_name)
 
     model_name = args.model_name
     build_id = args.build_id
-    alpha = args.alpha
     dataset_name = args.dataset_name
 
+    print("Getting training parameters")
+
+    pars = get_model_parameters()
+    alpha = pars.get("alpha")
+    if alpha is None:
+        alpha = 0.5
+
+    print("Parameter alpha: %s" % alpha)
+
     run = Run.get_context()
     ws = run.experiment.workspace
 
diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py
index 66913420..3676b2d6 100644
--- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py
+++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py
@@ -48,8 +48,6 @@ def main():
         name="model_name", default_value=e.model_name)
     build_id_param = PipelineParameter(
         name="build_id", default_value=e.build_id)
-    hyperparameter_alpha_param = PipelineParameter(
-        name="hyperparameter_alpha", default_value=0.5)
 
     dataset_name = ""
     if (e.datastore_name is not None and e.datafile_name is not None):
@@ -70,7 +68,6 @@ def main():
         arguments=[
             "--build_id", build_id_param,
             "--model_name", model_name_param,
-            "--alpha", hyperparameter_alpha_param,
             "--dataset_name", dataset_name,
         ],
         runconfig=run_config,

From aef04872111323f02295b4d867cebd71f214f297 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 12:24:46 -0800
Subject: [PATCH 2/9] linting

---
 diabetes_regression/training/train.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index 67d6059b..a6fa1d6a 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -47,11 +47,13 @@ def train_model(run, data, alpha):
         preds, data["test"]["y"]), description="Mean squared error metric")
     return reg
 
+
 def get_model_parameters():
     with open("train.json") as f:
-        data=json.load(f)
+        data = json.load(f)
     return data
 
+
 def main():
     print("Running train.py")
 

From 6d1388108beea0ad99f5c8d443edd0b61129da00 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 18:28:18 -0800
Subject: [PATCH 3/9] update config

---
 diabetes_regression/config.json         | 6 ++++++
 diabetes_regression/training/train.json | 3 ---
 diabetes_regression/training/train.py   | 9 +++++----
 3 files changed, 11 insertions(+), 7 deletions(-)
 create mode 100644 diabetes_regression/config.json
 delete mode 100644 diabetes_regression/training/train.json

diff --git a/diabetes_regression/config.json b/diabetes_regression/config.json
new file mode 100644
index 00000000..a7b4bc1c
--- /dev/null
+++ b/diabetes_regression/config.json
@@ -0,0 +1,6 @@
+{
+    "training":
+    {
+        "alpha": 0.4
+    }
+}
diff --git a/diabetes_regression/training/train.json b/diabetes_regression/training/train.json
deleted file mode 100644
index 3158cd1b..00000000
--- a/diabetes_regression/training/train.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "alpha": 0.4
-}
diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index a6fa1d6a..a2953ceb 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -49,7 +49,7 @@ def train_model(run, data, alpha):
 
 
 def get_model_parameters():
-    with open("train.json") as f:
+    with open("../config.json") as f:
         data = json.load(f)
     return data
 
@@ -88,9 +88,10 @@ def main():
     print("Getting training parameters")
 
     pars = get_model_parameters()
-    alpha = pars.get("alpha")
-    if alpha is None:
-        alpha = 0.5
+    try:
+        alpha=pars["training"]["alpha"]
+    except KeyError:
+        alpha=0.5
 
     print("Parameter alpha: %s" % alpha)
 

From 00e11b52b82af3c5f4ce0bc114c7b5589d0d9e33 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 18:37:59 -0800
Subject: [PATCH 4/9] linting

---
 diabetes_regression/training/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index a2953ceb..b2c0196c 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -89,9 +89,9 @@ def main():
 
     pars = get_model_parameters()
     try:
-        alpha=pars["training"]["alpha"]
+        alpha = pars["training"]["alpha"]
     except KeyError:
-        alpha=0.5
+        alpha = 0.5
 
     print("Parameter alpha: %s" % alpha)
 

From 82743b256ce34c4d339fde548f2f48b6ea4ef577 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 18:53:53 -0800
Subject: [PATCH 5/9] move get_model_parameters to helper

---
 diabetes_regression/config.json          |  8 ++++++++
 diabetes_regression/training/train.py    |  8 +-------
 diabetes_regression/util/model_helper.py | 15 +++++++++++++++
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/diabetes_regression/config.json b/diabetes_regression/config.json
index a7b4bc1c..859fd84d 100644
--- a/diabetes_regression/config.json
+++ b/diabetes_regression/config.json
@@ -2,5 +2,13 @@
     "training":
     {
         "alpha": 0.4
+    },
+    "evaluation":
+    {
+
+    },
+    "scoring":
+    {
+        
     }
 }
diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index b2c0196c..f37f5a2a 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -32,7 +32,7 @@
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from sklearn.externals import joblib
-import json
+from util.model_helper import get_model_parameters
 
 
 def train_model(run, data, alpha):
@@ -48,12 +48,6 @@ def train_model(run, data, alpha):
     return reg
 
 
-def get_model_parameters():
-    with open("../config.json") as f:
-        data = json.load(f)
-    return data
-
-
 def main():
     print("Running train.py")
 
diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py
index 98df0bb8..5bd8d923 100644
--- a/diabetes_regression/util/model_helper.py
+++ b/diabetes_regression/util/model_helper.py
@@ -4,6 +4,21 @@
 from azureml.core import Run
 from azureml.core import Workspace
 from azureml.core.model import Model as AMLModel
+import json
+
+
+def get_model_parameters():
+    """
+    Getting parameters from config.json
+    Parameters:
+    None
+
+    Return:
+    a dictionary from config.json
+    """
+    with open("../config.json") as f:
+        data = json.load(f)
+    return data
 
 
 def get_current_workspace() -> Workspace:

From 96ad87af7692b4b11c9aa8b4cfd8a87a939fecda Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 19:02:29 -0800
Subject: [PATCH 6/9] cannot move to util due to unit tests

---
 diabetes_regression/training/train.py    |  5 +++--
 diabetes_regression/util/model_helper.py | 15 ---------------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index f37f5a2a..c25fa61f 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -32,7 +32,7 @@
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from sklearn.externals import joblib
-from util.model_helper import get_model_parameters
+import json
 
 
 def train_model(run, data, alpha):
@@ -81,7 +81,8 @@ def main():
 
     print("Getting training parameters")
 
-    pars = get_model_parameters()
+    with open("../config.json") as f:
+        pars = json.load(f)
     try:
         alpha = pars["training"]["alpha"]
     except KeyError:
diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py
index 5bd8d923..98df0bb8 100644
--- a/diabetes_regression/util/model_helper.py
+++ b/diabetes_regression/util/model_helper.py
@@ -4,21 +4,6 @@
 from azureml.core import Run
 from azureml.core import Workspace
 from azureml.core.model import Model as AMLModel
-import json
-
-
-def get_model_parameters():
-    """
-    Getting parameters from config.json
-    Parameters:
-    None
-
-    Return:
-    a dictionary from config.json
-    """
-    with open("../config.json") as f:
-        data = json.load(f)
-    return data
 
 
 def get_current_workspace() -> Workspace:

From 98a7d732409d124b313fc15b61095d2a67e7ca1a Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 19:26:08 -0800
Subject: [PATCH 7/9] documentation

---
 docs/getting_started.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index 8b3167e4..cebe1b2c 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -86,6 +86,8 @@ For instructions on how to set up a local development environment, refer to the
 
 For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/diabetes_regression-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements.
 
+**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html).
+
 Up until now you should have:
 
 * Forked (or cloned) the repo
@@ -120,7 +122,7 @@ Check out the newly created resources in the [Azure Portal](portal.azure.com):
 (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](portal.azure.com).
 
 **Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). 
-You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. 
+You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group.
 
 
 ## Create an Azure DevOps Azure ML Workspace Service Connection
@@ -187,7 +189,7 @@ specified).
 **Note:** If the model evaluation determines that the new model does not perform better than the previous one then the new model will not be registered and the pipeline will be cancelled.
 
 * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format.
- 
+
 Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**:
 
 ![trained model](./images/trained-model.png)
@@ -247,7 +249,6 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta
 
 * The provided pipeline definition YAML file is a sample starting point, which you should tailor to your processes and environment.
 * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage.
-* The sample pipeline generates a random value for a model hyperparameter (ridge regression [*alpha*](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)) to generate 'interesting' charts when testing the sample. In a real application you should use fixed hyperparameter values. You can [tune hyperparameter values using Azure ML](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters), and manage their values in Azure DevOps Variable Groups.
 * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages.
 * You can install additional Conda or pip packages by modifying the YAML environment configurations under the `diabetes_regression` directory. Make sure to use fixed version numbers for all packages to ensure reproducibility, and use the same versions across environments.
 * You can explore aspects of model observability in the solution, such as:

From e76f89de07932deeb2d1c70eae7d1bdeeb224812 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 19:28:12 -0800
Subject: [PATCH 8/9] more doc

---
 docs/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index cebe1b2c..1d75bc05 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -86,7 +86,7 @@ For instructions on how to set up a local development environment, refer to the
 
 For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/diabetes_regression-variables.yml`. Using the default values as a starting point, adjust the variables to suit your requirements.
 
-**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html).
+**Note:** In `diabetes_regression` folder you can find `config.json` file that we would recommend to use in order to provide parameters for training, evaluation and scoring scripts. An example of a such parameter is a hyperparameter of a training algorithm: in our case it's the ridge regression [*alpha* hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). We don't provide any special serializers for this config file. So, it's up to you which template to support there.
 
 Up until now you should have:
 

From f8af1971ce7af5b7ed9a51ea7d76103192cb4d72 Mon Sep 17 00:00:00 2001
From: Sergii Baidachnyi <sbaidachni@gmail.com>
Date: Fri, 31 Jan 2020 19:52:54 -0800
Subject: [PATCH 9/9] hm. it's executing from diabetes_regression root.

---
 diabetes_regression/training/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py
index c25fa61f..fcec4f65 100644
--- a/diabetes_regression/training/train.py
+++ b/diabetes_regression/training/train.py
@@ -81,7 +81,7 @@ def main():
 
     print("Getting training parameters")
 
-    with open("../config.json") as f:
+    with open("config.json") as f:
         pars = json.load(f)
     try:
         alpha = pars["training"]["alpha"]