From 882f1df3a1beac25a87d2f89bfdfe8828255dcf3 Mon Sep 17 00:00:00 2001
From: chicm-ms <chec@microsoft.com>
Date: Thu, 19 Mar 2020 13:28:52 +0800
Subject: [PATCH] Add dict metrics test

---
 test/config/integration_tests.yml             | 22 +++++++--
 .../{metrics.test.yml => config.yml}          |  0
 ...win32.test.yml => config_dict_metrics.yml} |  2 +-
 .../metrics_test/expected_metrics_dict.json   | 11 +++++
 test/config/metrics_test/trial.py             | 28 ++++++++----
 test/nni_test/nnitest/run_tests.py            | 45 ++++++++++++-------
 test/nni_test/nnitest/validators.py           | 22 +++++----
 test/scripts/model_compression.sh             |  5 +--
 8 files changed, 94 insertions(+), 41 deletions(-)
 rename test/config/metrics_test/{metrics.test.yml => config.yml} (100%)
 rename test/config/metrics_test/{metrics_win32.test.yml => config_dict_metrics.yml} (87%)
 create mode 100644 test/config/metrics_test/expected_metrics_dict.json

diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml
index 7e0e80ce60..31359b1e84 100644
--- a/test/config/integration_tests.yml
+++ b/test/config/integration_tests.yml
@@ -66,12 +66,25 @@ testCases:
 #########################################################################
 # nni features test
 #########################################################################
-- name: metrics_test
-  configFile: test/config/metrics_test/metrics.test.yml
+- name: metrics_float
+  configFile: test/config/metrics_test/config.yml
   config:
     maxTrialNum: 1
     trialConcurrency: 1
-  validator: MetricsValidator
+  validator:
+    class: MetricsValidator
+    kwargs:
+      expected_result_file: expected_metrics.json
+
+- name: metrics_dict
+  configFile: test/config/metrics_test/config_dict_metrics.yml
+  config:
+    maxTrialNum: 1
+    trialConcurrency: 1
+  validator:
+    class: MetricsValidator
+    kwargs:
+      expected_result_file: expected_metrics_dict.json
 
 - name: nnicli
   configFile: test/config/examples/sklearn-regression.yml
@@ -80,7 +93,8 @@ testCases:
     trialConcurrency: 4
   launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
   stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
-  validator: NnicliValidator
+  validator:
+    class: NnicliValidator
 
 # Experiment resume test part 1
 - name: nnictl-resume-1
diff --git a/test/config/metrics_test/metrics.test.yml b/test/config/metrics_test/config.yml
similarity index 100%
rename from test/config/metrics_test/metrics.test.yml
rename to test/config/metrics_test/config.yml
diff --git a/test/config/metrics_test/metrics_win32.test.yml b/test/config/metrics_test/config_dict_metrics.yml
similarity index 87%
rename from test/config/metrics_test/metrics_win32.test.yml
rename to test/config/metrics_test/config_dict_metrics.yml
index 39dfb662e8..286363dffe 100644
--- a/test/config/metrics_test/metrics_win32.test.yml
+++ b/test/config/metrics_test/config_dict_metrics.yml
@@ -10,7 +10,7 @@ tuner:
 
 trial:
   codeDir: .
-  command: python trial.py
+  command: python3 trial.py --dict_metrics
   gpuNum: 0
 
 useAnnotation: false
diff --git a/test/config/metrics_test/expected_metrics_dict.json b/test/config/metrics_test/expected_metrics_dict.json
new file mode 100644
index 0000000000..c3d57f88af
--- /dev/null
+++ b/test/config/metrics_test/expected_metrics_dict.json
@@ -0,0 +1,11 @@
+{
+    "intermediate_result": [
+        {"default": 0.1, "loss": 0.11, "other": 0.111},
+        {"default": 0.2, "loss": 0.22, "other": 0.222},
+        {"default": 0.3, "loss": 0.33, "other": 0.333},
+        {"default": 0.4, "loss": 0.44, "other": 0.444},
+        {"default": 0.5, "loss": 0.55, "other": 0.555}
+
+    ],
+    "final_result": {"default": 0.6, "loss": 0.66, "other": 0.666}
+}
diff --git a/test/config/metrics_test/trial.py b/test/config/metrics_test/trial.py
index d9a61372ab..5c12afef44 100644
--- a/test/config/metrics_test/trial.py
+++ b/test/config/metrics_test/trial.py
@@ -2,18 +2,28 @@
 # Licensed under the MIT license.
 
 import time
+import json
+import argparse
 import nni
 
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dict_metrics", action='store_true')
+    args = parser.parse_args()
+
+    if args.dict_metrics:
+        result_file = 'expected_metrics_dict.json'
+    else:
+        result_file = 'expected_metrics.json'
+
     nni.get_next_parameter()
+    with open(result_file, 'r') as f:
+        m = json.load(f)
+    for v in m['intermediate_result']:
+        time.sleep(1)
+        print('report_intermediate_result:', v)
+        nni.report_intermediate_result(v)
     time.sleep(1)
-    for i in range(10):
-        if i % 2 == 0:
-            print('report intermediate result without end of line.', end='')
-        else:
-            print('report intermediate result.')
-        nni.report_intermediate_result(0.1*(i+1))
-        time.sleep(2)
-    print('test final metrics not at line start.', end='')
-    nni.report_final_result(1.0)
+    print('report_final_result:', m['final_result'])
+    nni.report_final_result(m['final_result'])
     print('done')
diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py
index da845414fa..56055f7653 100644
--- a/test/nni_test/nnitest/run_tests.py
+++ b/test/nni_test/nnitest/run_tests.py
@@ -37,19 +37,10 @@ def update_training_service_config(config, training_service):
     deep_update(config, it_ts_config['all'])
     deep_update(config, it_ts_config[training_service])
 
-def run_test_case(test_case_config, it_config, args):
-    # fill test case default config
-    for k in it_config['defaultTestCaseConfig']:
-        if k not in test_case_config:
-            test_case_config[k] = it_config['defaultTestCaseConfig'][k]
-    print(json.dumps(test_case_config, indent=4))
-
+def prepare_config_file(test_case_config, it_config, args):
     config_path = os.path.join(args.nni_source_dir, test_case_config['configFile'])
     test_yml_config = get_yml_content(config_path)
 
-    # apply training service config
-    update_training_service_config(test_yml_config, args.ts)
-
     # apply test case specific config
     if test_case_config.get('config') is not None:
         deep_update(test_yml_config, test_case_config['config'])
@@ -58,21 +49,32 @@ def run_test_case(test_case_config, it_config, args):
     if sys.platform == 'win32':
         test_yml_config['trial']['command'] = test_yml_config['trial']['command'].replace('python3', 'python')
 
+    # apply training service config
+    # user's gpuNum, logCollection config is overwritten by the config in training_service.yml
+    # the hack for kubeflow should be applied at last step
+    update_training_service_config(test_yml_config, args.ts)
+
     # generate temporary config yml file to launch experiment
     new_config_file = config_path + '.tmp'
     dump_yml_content(new_config_file, test_yml_config)
     print(yaml.dump(test_yml_config, default_flow_style=False))
 
+    return new_config_file
+
+def run_test_case(test_case_config, it_config, args):
+    # fill test case default config
+    for k in it_config['defaultTestCaseConfig']:
+        if k not in test_case_config:
+            test_case_config[k] = it_config['defaultTestCaseConfig'][k]
+    print(json.dumps(test_case_config, indent=4))
+
+    new_config_file = prepare_config_file(test_case_config, it_config, args)
     # set configFile variable
     it_variables['$configFile'] = new_config_file
 
     try:
         launch_test(new_config_file, args.ts, test_case_config)
-
-        validator_name = test_case_config.get('validator')
-        if validator_name is not None:
-            validator = validators.__dict__[validator_name]()
-            validator(REST_ENDPOINT, None, args.nni_source_dir)
+        invoke_validator(test_case_config, args.nni_source_dir)
     finally:
         print('Stop command:', test_case_config.get('stopCommand'))
         if test_case_config.get('stopCommand'):
@@ -81,6 +83,16 @@ def run_test_case(test_case_config, it_config, args):
         if os.path.exists(new_config_file):
             os.remove(new_config_file)
 
+def invoke_validator(test_case_config, nni_source_dir):
+    validator_config = test_case_config.get('validator')
+    if validator_config is None or validator_config.get('class') is None:
+        return
+
+    validator = validators.__dict__[validator_config.get('class')]()
+    kwargs = validator_config.get('kwargs', {})
+    print('kwargs:', kwargs)
+    validator(REST_ENDPOINT, None, nni_source_dir, **kwargs)
+
 def get_max_values(config_file):
     '''Get maxExecDuration and maxTrialNum of experiment'''
     experiment_config = get_yml_content(config_file)
@@ -152,7 +164,8 @@ def run(args):
         begin_time = time.time()
 
         run_test_case(test_case_config, it_config, args)
-        print(GREEN + 'Test %s: TEST PASS IN %d mins' % (name, (time.time() - begin_time)/60) + CLEAR)
+        print('{}Test {}: TEST PASS IN {} SECONDS{}'.format(GREEN, name, int(time.time()-begin_time), CLEAR), flush=True)
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py
index 1032720747..2fc43abe89 100644
--- a/test/nni_test/nnitest/validators.py
+++ b/test/nni_test/nnitest/validators.py
@@ -9,19 +9,21 @@
 
 
 class ITValidator:
-    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir):
+    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
         pass
 
 
 class MetricsValidator(ITValidator):
-    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir):
-        self.check_metrics(nni_source_dir)
+    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
+        self.check_metrics(nni_source_dir, **kwargs)
 
-    def check_metrics(self, nni_source_dir):
-        with open(osp.join(nni_source_dir, 'test', 'config', 'metrics_test', 'expected_metrics.json'), 'r') as f:
+    def check_metrics(self, nni_source_dir, **kwargs):
+        expected_result_file = kwargs.get('expected_result_file', 'expected_metrics.json')
+        with open(osp.join(nni_source_dir, 'test', 'config', 'metrics_test', expected_result_file), 'r') as f:
             expected_metrics = json.load(f)
         print('expected metrics:', expected_metrics)
         metrics = requests.get(METRICS_URL).json()
+        print('RAW METRICS:', json.dumps(metrics, indent=4))
         intermediate_result, final_result = self.get_metric_results(metrics)
 
         assert intermediate_result and final_result
@@ -32,13 +34,17 @@ def check_metrics(self, nni_source_dir):
             print('final result:', trial_final_result)
             assert len(trial_final_result) == 1, 'there should be 1 final result'
             assert trial_final_result[0] == expected_metrics['final_result']
-            assert set(trial_intermediate_result) == set(expected_metrics['intermediate_result'])
+            # encode dict/number into json string to compare them in set
+            assert set([json.dumps(x) for x in trial_intermediate_result]) \
+                == set([json.dumps(x) for x in expected_metrics['intermediate_result']])
 
     def get_metric_results(self, metrics):
         intermediate_result = {}
         final_result = {}
         for metric in metrics:
-            metric_value = round(float(json.loads(metric['data'])), 2)
+            # metrics value are encoded by NNI SDK as json string,
+            # here we decode the value by json.loads twice
+            metric_value = json.loads(json.loads(metric['data']))
             if metric['type'] == 'PERIODICAL':
                 if metric['trialJobId'] in intermediate_result:
                     intermediate_result[metric['trialJobId']].append(metric_value)
@@ -52,7 +58,7 @@ def get_metric_results(self, metrics):
         return intermediate_result, final_result
 
 class NnicliValidator(ITValidator):
-    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir):
+    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
         print(rest_endpoint)
         nc.set_endpoint(rest_endpoint)
         #print(nc.version())
diff --git a/test/scripts/model_compression.sh b/test/scripts/model_compression.sh
index ade720f86c..b1051f69e4 100644
--- a/test/scripts/model_compression.sh
+++ b/test/scripts/model_compression.sh
@@ -27,12 +27,11 @@ do
     python3 model_prune_torch.py --pruner_name $name --pretrain_epochs 1 --prune_epochs 1
 done
 
-echo "testing lottery ticket pruning..."
-python3 lottery_torch_mnist_fc.py
+#echo "testing lottery ticket pruning..."
+#python3 lottery_torch_mnist_fc.py
 
 echo ""
 echo "===========================Testing: quantizers==========================="
-cd ${CWD}/../examples/model_compress
 
 echo "testing QAT quantizer..."
 python3 QAT_torch_quantizer.py