From d28d719b0fd1790d2934b41ee07ea77bf031e8c3 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 12:01:46 +0800 Subject: [PATCH 01/38] Refactor integration tests (#2165) --- .../examples/cifar10-pytorch.yml} | 0 .../examples/cifar10_search_space.json | 0 .../examples/mnist-annotation.yml} | 2 +- .../examples/mnist-keras.yml} | 0 .../examples/mnist-nested-search-space.yml} | 2 +- .../examples/mnist-pytorch.yml} | 2 +- .../examples/mnist-tfv1.yml} | 2 +- .../examples/mnist_pytorch_search_space.json | 0 .../examples/mnist_search_space.json | 0 .../examples/sklearn-classification.yml} | 0 .../examples/sklearn-regression.yml} | 0 test/config/integration_tests.yml | 144 ++++++++++++++++ .../metrics_test/expected_metrics.json | 0 .../metrics_test/metrics.test.yml | 0 .../metrics_test/metrics_win32.test.yml | 0 .../metrics_test}/search_space.json | 0 test/{ => config}/metrics_test/trial.py | 0 .../multi_phase/batch.yml} | 0 .../multi_phase/evolution.yml} | 0 .../multi_phase/grid.yml} | 0 .../multi_phase/metis.yml} | 2 +- .../multi_phase/multi_phase.py | 0 .../multi_phase}/search_space.json | 0 .../multi_phase/tpe.yml} | 2 +- .../multi_thread/config.yml} | 0 .../multi_thread/multi_thread_trial.py | 0 .../multi_thread/multi_thread_tuner.py | 0 .../multi_thread}/search_space.json | 0 test/{ => config}/naive_test/README.md | 0 test/{ => config}/naive_test/README_zh_CN.md | 0 .../naive_test/expected_assessor_result.txt | 0 .../naive_test/expected_tuner_result.txt | 0 test/{ => config}/naive_test/local.yml | 0 test/{ => config}/naive_test/local_win32.yml | 0 .../{ => config}/naive_test/naive_assessor.py | 0 test/{ => config}/naive_test/naive_trial.py | 0 test/{ => config}/naive_test/naive_tuner.py | 0 .../{ => config}/naive_test/search_space.json | 0 test/{ => config}/training_service.yml | 0 .../tuner_test/batchtuner_search_space.json | 0 test/{ => config}/tuner_test/local.yml | 0 test/{ => config}/tuner_test/local_win32.yml | 0 test/{ => config}/tuner_test/naive_trial.py | 0 .../{ => config}/tuner_test/search_space.json | 0 .../tuners/mnist-annotation-anneal.yml} | 0 .../tuners/mnist-annotation-evolution.yml} | 0 .../tuners/mnist-annotation-random.yml} | 0 .../tuners/mnist-annotation-smac.yml} | 0 .../tuners/mnist-annotation-tpe.yml} | 0 .../tuners/mnist-batchtuner.yml} | 0 .../tuners/mnist-bohb.yml} | 0 .../tuners/mnist-curvefitting.yml} | 0 .../tuners/mnist-gp.yml} | 0 .../tuners/mnist-gridsearch.yml} | 0 .../tuners/mnist-hyperband.yml} | 0 .../tuners/mnist-metis.yml} | 0 test/config/tuners/mnist-metis.yml.tmp | 23 +++ .../tuners/search_space.json | 0 .../tuners/search_space_advisor.json | 0 .../tuners/search_space_batchtuner.json | 0 .../multi_phase/multi_phase.test.yml | 22 --- test/nni_test/nnitest/__init__.py | 0 test/{ => nni_test/nnitest}/cli_test.py | 0 test/{ => nni_test/nnitest}/config_test.py | 4 +- .../nnitest}/generate_ts_config.py | 3 +- test/{ => nni_test/nnitest}/metrics_test.py | 0 test/{ => nni_test/nnitest}/naive_test.py | 33 ++-- test/{ => nni_test/nnitest}/remote_docker.py | 0 test/nni_test/nnitest/run_tests.py | 161 ++++++++++++++++++ test/{ => nni_test/nnitest}/tuner_test.py | 0 test/{ => nni_test/nnitest}/utils.py | 31 +++- test/nni_test/nnitest/validators.py | 52 ++++++ test/nni_test/setup.py | 18 ++ .../pipelines-it-frameworkcontroller.yml | 0 .../pipelines-it-installation.yml | 0 .../{ => pipelines}/pipelines-it-kubeflow.yml | 0 .../pipelines-it-local-windows.yml | 0 test/{ => pipelines}/pipelines-it-local.yml | 12 +- .../pipelines-it-pai-windows.yml | 0 test/{ => pipelines}/pipelines-it-pai.yml | 0 test/{ => pipelines}/pipelines-it-paiYarn.yml | 0 .../pipelines-it-remote-windows.yml | 0 test/{ => pipelines}/pipelines-it-remote.yml | 0 test/{ => scripts}/it.sh | 0 test/{ => scripts}/unittest.ps1 | 0 test/{ => scripts}/unittest.sh | 0 86 files changed, 457 insertions(+), 58 deletions(-) rename test/{config_test/examples/cifar10-pytorch.test.yml => config/examples/cifar10-pytorch.yml} (100%) rename test/{config_test => config}/examples/cifar10_search_space.json (100%) rename test/{config_test/examples/mnist-annotation.test.yml => config/examples/mnist-annotation.yml} (89%) rename test/{config_test/examples/mnist-keras.test.yml => config/examples/mnist-keras.yml} (100%) rename test/{config_test/examples/mnist-nested-search-space.test.yml => config/examples/mnist-nested-search-space.yml} (92%) rename test/{config_test/examples/mnist-pytorch.test.yml => config/examples/mnist-pytorch.yml} (91%) rename test/{config_test/examples/mnist-tfv1.test.yml => config/examples/mnist-tfv1.yml} (90%) rename test/{config_test => config}/examples/mnist_pytorch_search_space.json (100%) rename test/{config_test => config}/examples/mnist_search_space.json (100%) rename test/{config_test/examples/sklearn-classification.test.yml => config/examples/sklearn-classification.yml} (100%) rename test/{config_test/examples/sklearn-regression.test.yml => config/examples/sklearn-regression.yml} (100%) create mode 100644 test/config/integration_tests.yml rename test/{ => config}/metrics_test/expected_metrics.json (100%) rename test/{ => config}/metrics_test/metrics.test.yml (100%) rename test/{ => config}/metrics_test/metrics_win32.test.yml (100%) rename test/{config_test/multi_phase => config/metrics_test}/search_space.json (100%) rename test/{ => config}/metrics_test/trial.py (100%) rename test/{config_test/multi_phase/multi_phase_batch.test.yml => config/multi_phase/batch.yml} (100%) rename test/{config_test/multi_phase/multi_phase_evolution.test.yml => config/multi_phase/evolution.yml} (100%) rename test/{config_test/multi_phase/multi_phase_grid.test.yml => config/multi_phase/grid.yml} (100%) rename test/{config_test/multi_phase/multi_phase_metis.test.yml => config/multi_phase/metis.yml} (95%) rename test/{config_test => config}/multi_phase/multi_phase.py (100%) rename test/{config_test/multi_thread => config/multi_phase}/search_space.json (100%) rename test/{config_test/multi_phase/multi_phase_tpe.test.yml => config/multi_phase/tpe.yml} (95%) rename test/{config_test/multi_thread/multi_thread.test.yml => config/multi_thread/config.yml} (100%) rename test/{config_test => config}/multi_thread/multi_thread_trial.py (100%) rename test/{config_test => config}/multi_thread/multi_thread_tuner.py (100%) rename test/{metrics_test => config/multi_thread}/search_space.json (100%) rename test/{ => config}/naive_test/README.md (100%) rename test/{ => config}/naive_test/README_zh_CN.md (100%) rename test/{ => config}/naive_test/expected_assessor_result.txt (100%) rename test/{ => config}/naive_test/expected_tuner_result.txt (100%) rename test/{ => config}/naive_test/local.yml (100%) rename test/{ => config}/naive_test/local_win32.yml (100%) rename test/{ => config}/naive_test/naive_assessor.py (100%) rename test/{ => config}/naive_test/naive_trial.py (100%) rename test/{ => config}/naive_test/naive_tuner.py (100%) rename test/{ => config}/naive_test/search_space.json (100%) rename test/{ => config}/training_service.yml (100%) rename test/{ => config}/tuner_test/batchtuner_search_space.json (100%) rename test/{ => config}/tuner_test/local.yml (100%) rename test/{ => config}/tuner_test/local_win32.yml (100%) rename test/{ => config}/tuner_test/naive_trial.py (100%) rename test/{ => config}/tuner_test/search_space.json (100%) rename test/{config_test/tuners/mnist-annotation-anneal.test.yml => config/tuners/mnist-annotation-anneal.yml} (100%) rename test/{config_test/tuners/mnist-annotation-evolution.test.yml => config/tuners/mnist-annotation-evolution.yml} (100%) rename test/{config_test/tuners/mnist-annotation-random.test.yml => config/tuners/mnist-annotation-random.yml} (100%) rename test/{config_test/tuners/mnist-annotation-smac.test.yml => config/tuners/mnist-annotation-smac.yml} (100%) rename test/{config_test/tuners/mnist-annotation-tpe.test.yml => config/tuners/mnist-annotation-tpe.yml} (100%) rename test/{config_test/tuners/mnist-batchtuner.test.yml => config/tuners/mnist-batchtuner.yml} (100%) rename test/{config_test/tuners/mnist-bohb.test.yml => config/tuners/mnist-bohb.yml} (100%) rename test/{config_test/tuners/mnist-curvefitting.test.yml => config/tuners/mnist-curvefitting.yml} (100%) rename test/{config_test/tuners/mnist-gp.test.yml => config/tuners/mnist-gp.yml} (100%) rename test/{config_test/tuners/mnist-gridsearch.test.yml => config/tuners/mnist-gridsearch.yml} (100%) rename test/{config_test/tuners/mnist-hyperband.test.yml => config/tuners/mnist-hyperband.yml} (100%) rename test/{config_test/tuners/mnist-metis.test.yml => config/tuners/mnist-metis.yml} (100%) create mode 100644 test/config/tuners/mnist-metis.yml.tmp rename test/{config_test => config}/tuners/search_space.json (100%) rename test/{config_test => config}/tuners/search_space_advisor.json (100%) rename test/{config_test => config}/tuners/search_space_batchtuner.json (100%) delete mode 100644 test/config_test/multi_phase/multi_phase.test.yml create mode 100644 test/nni_test/nnitest/__init__.py rename test/{ => nni_test/nnitest}/cli_test.py (100%) rename test/{ => nni_test/nnitest}/config_test.py (96%) rename test/{ => nni_test/nnitest}/generate_ts_config.py (98%) rename test/{ => nni_test/nnitest}/metrics_test.py (100%) rename test/{ => nni_test/nnitest}/naive_test.py (72%) rename test/{ => nni_test/nnitest}/remote_docker.py (100%) create mode 100644 test/nni_test/nnitest/run_tests.py rename test/{ => nni_test/nnitest}/tuner_test.py (100%) rename test/{ => nni_test/nnitest}/utils.py (83%) create mode 100644 test/nni_test/nnitest/validators.py create mode 100644 test/nni_test/setup.py rename test/{ => pipelines}/pipelines-it-frameworkcontroller.yml (100%) rename test/{ => pipelines}/pipelines-it-installation.yml (100%) rename test/{ => pipelines}/pipelines-it-kubeflow.yml (100%) rename test/{ => pipelines}/pipelines-it-local-windows.yml (100%) rename test/{ => pipelines}/pipelines-it-local.yml (75%) rename test/{ => pipelines}/pipelines-it-pai-windows.yml (100%) rename test/{ => pipelines}/pipelines-it-pai.yml (100%) rename test/{ => pipelines}/pipelines-it-paiYarn.yml (100%) rename test/{ => pipelines}/pipelines-it-remote-windows.yml (100%) rename test/{ => pipelines}/pipelines-it-remote.yml (100%) rename test/{ => scripts}/it.sh (100%) rename test/{ => scripts}/unittest.ps1 (100%) rename test/{ => scripts}/unittest.sh (100%) mode change 100755 => 100644 diff --git a/test/config_test/examples/cifar10-pytorch.test.yml b/test/config/examples/cifar10-pytorch.yml similarity index 100% rename from test/config_test/examples/cifar10-pytorch.test.yml rename to test/config/examples/cifar10-pytorch.yml diff --git a/test/config_test/examples/cifar10_search_space.json b/test/config/examples/cifar10_search_space.json similarity index 100% rename from test/config_test/examples/cifar10_search_space.json rename to test/config/examples/cifar10_search_space.json diff --git a/test/config_test/examples/mnist-annotation.test.yml b/test/config/examples/mnist-annotation.yml similarity index 89% rename from test/config_test/examples/mnist-annotation.test.yml rename to test/config/examples/mnist-annotation.yml index 0402fbea8b..17d28684e6 100644 --- a/test/config_test/examples/mnist-annotation.test.yml +++ b/test/config/examples/mnist-annotation.yml @@ -12,7 +12,7 @@ assessor: optimize_mode: maximize trial: codeDir: ../../../examples/trials/mnist-annotation - command: python3 mnist.py --batch_num 100 + command: python3 mnist.py --batch_num 10 gpuNum: 0 useAnnotation: true diff --git a/test/config_test/examples/mnist-keras.test.yml b/test/config/examples/mnist-keras.yml similarity index 100% rename from test/config_test/examples/mnist-keras.test.yml rename to test/config/examples/mnist-keras.yml diff --git a/test/config_test/examples/mnist-nested-search-space.test.yml b/test/config/examples/mnist-nested-search-space.yml similarity index 92% rename from test/config_test/examples/mnist-nested-search-space.test.yml rename to test/config/examples/mnist-nested-search-space.yml index accbd8b88f..9be7c8ef7e 100644 --- a/test/config_test/examples/mnist-nested-search-space.test.yml +++ b/test/config/examples/mnist-nested-search-space.yml @@ -14,7 +14,7 @@ assessor: optimize_mode: maximize trial: codeDir: ../../../examples/trials/mnist-nested-search-space - command: python3 mnist.py --batch_num 100 + command: python3 mnist.py --batch_num 10 gpuNum: 0 useAnnotation: false diff --git a/test/config_test/examples/mnist-pytorch.test.yml b/test/config/examples/mnist-pytorch.yml similarity index 91% rename from test/config_test/examples/mnist-pytorch.test.yml rename to test/config/examples/mnist-pytorch.yml index a5f375b3b7..6aab3fc80f 100644 --- a/test/config_test/examples/mnist-pytorch.test.yml +++ b/test/config/examples/mnist-pytorch.yml @@ -13,7 +13,7 @@ assessor: optimize_mode: maximize trial: codeDir: ../../../examples/trials/mnist-pytorch - command: python3 mnist.py --epochs 2 + command: python3 mnist.py --epochs 1 gpuNum: 0 useAnnotation: false diff --git a/test/config_test/examples/mnist-tfv1.test.yml b/test/config/examples/mnist-tfv1.yml similarity index 90% rename from test/config_test/examples/mnist-tfv1.test.yml rename to test/config/examples/mnist-tfv1.yml index 13ecf3f9b9..f66e288efc 100644 --- a/test/config_test/examples/mnist-tfv1.test.yml +++ b/test/config/examples/mnist-tfv1.yml @@ -13,7 +13,7 @@ assessor: optimize_mode: maximize trial: codeDir: ../../../examples/trials/mnist-tfv1 - command: python3 mnist.py --batch_num 100 + command: python3 mnist.py --batch_num 10 gpuNum: 0 useAnnotation: false diff --git a/test/config_test/examples/mnist_pytorch_search_space.json b/test/config/examples/mnist_pytorch_search_space.json similarity index 100% rename from test/config_test/examples/mnist_pytorch_search_space.json rename to test/config/examples/mnist_pytorch_search_space.json diff --git a/test/config_test/examples/mnist_search_space.json b/test/config/examples/mnist_search_space.json similarity index 100% rename from test/config_test/examples/mnist_search_space.json rename to test/config/examples/mnist_search_space.json diff --git a/test/config_test/examples/sklearn-classification.test.yml b/test/config/examples/sklearn-classification.yml similarity index 100% rename from test/config_test/examples/sklearn-classification.test.yml rename to test/config/examples/sklearn-classification.yml diff --git a/test/config_test/examples/sklearn-regression.test.yml b/test/config/examples/sklearn-regression.yml similarity index 100% rename from test/config_test/examples/sklearn-regression.test.yml rename to test/config/examples/sklearn-regression.yml diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml new file mode 100644 index 0000000000..bcb6382aa3 --- /dev/null +++ b/test/config/integration_tests.yml @@ -0,0 +1,144 @@ + +defaultTestCaseConfig: + launchCommand: nnictl create --config $configFile + stopCommand: nnictl stop + +testCases: +####################################################################### +# nni examples test +####################################################################### +- name: sklearn-classification + # test case config yml file relative to nni source code directory + configFile: test/config/examples/sklearn-classification.yml + + # test case specific config, the content of configFile will be overrided + # by config section + config: + + # validator is called after experiment is done + # validator class needs to be implemented in nni_test/nnitest/validators.py + validator: + + # launch command, default launch command is 'nnictl create --config $configFile' + launchCommand: nnictl create --config $configFile + + # stop command, default stop command is 'nnictl stop', empty means no stop command + stopCommand: nnictl stop + + # set experiment ID into variable, variable name should start with $, such as $expId + setExperimentIdtoVar: $expId + +- name: sklearn-regression + configFile: test/config/examples/sklearn-regression.yml + +- name: mnist-tfv1 + configFile: test/config/examples/mnist-tfv1.yml + +- name: mnist-keras + configFile: test/config/examples/mnist-keras.yml + +- name: mnist-pytorch + configFile: test/config/examples/mnist-pytorch.yml + +- name: mnist-annotation + configFile: test/config/examples/mnist-annotation.yml + +- name: cifar10_pytorch + configFile: test/config/examples/cifar10-pytorch.yml + config: + # this example downloads large pretrained model weights + # test 1 trial to save time + maxExecDuration: 10m + maxTrialNum: 1 + trialConcurrency: 1 + trial: + command: python3 main.py --epochs 1 --batches 1 + gpuNum: 0 + +- name: nested-ss + configFile: test/config/examples/mnist-nested-search-space.yml + + +######################################################################### +# nni features test +######################################################################### +- name: metrics_test + configFile: test/config/metrics_test/metrics.test.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: MetricsValidator + +# Experiment resume test part 1 +- name: resume-1 + configFile: test/config/examples/sklearn-regression.yml + setExperimentIdtoVar: $resumeExpId + +# Experiment resume test part 2 +- name: resume-2 + configFile: test/config/examples/sklearn-regression.yml + launchCommand: nnictl resume $resumeExpId + +- name: multi-thread + configFile: test/config/multi_thread/config.yml + +- name: multi-phase-batch + configFile: test/config/multi_phase/batch.yml + config: + # for batch tuner, maxTrialNum can not exceed length of search space + maxTrialNum: 2 + trialConcurrency: 2 + +- name: multi-phase-evolution + configFile: test/config/multi_phase/evolution.yml + +- name: multi-phase-grid + configFile: test/config/multi_phase/grid.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + +- name: multi-phase-metis + configFile: test/config/multi_phase/metis.yml + +- name: multi-phase-tpe + configFile: test/config/multi_phase/tpe.yml + +######################################################################### +# nni tuners test +######################################################################### +- name: tuner-annel + configFile: test/config/tuners/mnist-annotation-anneal.yml + +- name: tuner-evolution + configFile: test/config/tuners/mnist-annotation-evolution.yml + +- name: tuner-random + configFile: test/config/tuners/mnist-annotation-random.yml + +- name: tuner-smac + configFile: test/config/tuners/mnist-annotation-smac.yml + +- name: tuner-tpe + configFile: test/config/tuners/mnist-annotation-tpe.yml + +- name: tuner-batch + configFile: test/config/tuners/mnist-batchtuner.yml + +- name: tuner-bohb + configFile: test/config/tuners/mnist-bohb.yml + +- name: tuner-curvefitting + configFile: test/config/tuners/mnist-curvefitting.yml + +- name: tuner-gp + configFile: test/config/tuners/mnist-gp.yml + +- name: tuner-grid + configFile: test/config/tuners/mnist-gridsearch.yml + +- name: tuner-hyperband + configFile: test/config/tuners/mnist-hyperband.yml + +- name: tuner-metis + configFile: test/config/tuners/mnist-metis.yml diff --git a/test/metrics_test/expected_metrics.json b/test/config/metrics_test/expected_metrics.json similarity index 100% rename from test/metrics_test/expected_metrics.json rename to test/config/metrics_test/expected_metrics.json diff --git a/test/metrics_test/metrics.test.yml b/test/config/metrics_test/metrics.test.yml similarity index 100% rename from test/metrics_test/metrics.test.yml rename to test/config/metrics_test/metrics.test.yml diff --git a/test/metrics_test/metrics_win32.test.yml b/test/config/metrics_test/metrics_win32.test.yml similarity index 100% rename from test/metrics_test/metrics_win32.test.yml rename to test/config/metrics_test/metrics_win32.test.yml diff --git a/test/config_test/multi_phase/search_space.json b/test/config/metrics_test/search_space.json similarity index 100% rename from test/config_test/multi_phase/search_space.json rename to test/config/metrics_test/search_space.json diff --git a/test/metrics_test/trial.py b/test/config/metrics_test/trial.py similarity index 100% rename from test/metrics_test/trial.py rename to test/config/metrics_test/trial.py diff --git a/test/config_test/multi_phase/multi_phase_batch.test.yml b/test/config/multi_phase/batch.yml similarity index 100% rename from test/config_test/multi_phase/multi_phase_batch.test.yml rename to test/config/multi_phase/batch.yml diff --git a/test/config_test/multi_phase/multi_phase_evolution.test.yml b/test/config/multi_phase/evolution.yml similarity index 100% rename from test/config_test/multi_phase/multi_phase_evolution.test.yml rename to test/config/multi_phase/evolution.yml diff --git a/test/config_test/multi_phase/multi_phase_grid.test.yml b/test/config/multi_phase/grid.yml similarity index 100% rename from test/config_test/multi_phase/multi_phase_grid.test.yml rename to test/config/multi_phase/grid.yml diff --git a/test/config_test/multi_phase/multi_phase_metis.test.yml b/test/config/multi_phase/metis.yml similarity index 95% rename from test/config_test/multi_phase/multi_phase_metis.test.yml rename to test/config/multi_phase/metis.yml index 16b0c8a07a..3198b480ed 100644 --- a/test/config_test/multi_phase/multi_phase_metis.test.yml +++ b/test/config/multi_phase/metis.yml @@ -1,7 +1,7 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 8 +maxTrialNum: 4 trialConcurrency: 4 searchSpacePath: ./search_space.json diff --git a/test/config_test/multi_phase/multi_phase.py b/test/config/multi_phase/multi_phase.py similarity index 100% rename from test/config_test/multi_phase/multi_phase.py rename to test/config/multi_phase/multi_phase.py diff --git a/test/config_test/multi_thread/search_space.json b/test/config/multi_phase/search_space.json similarity index 100% rename from test/config_test/multi_thread/search_space.json rename to test/config/multi_phase/search_space.json diff --git a/test/config_test/multi_phase/multi_phase_tpe.test.yml b/test/config/multi_phase/tpe.yml similarity index 95% rename from test/config_test/multi_phase/multi_phase_tpe.test.yml rename to test/config/multi_phase/tpe.yml index af21870320..2e259eeafe 100644 --- a/test/config_test/multi_phase/multi_phase_tpe.test.yml +++ b/test/config/multi_phase/tpe.yml @@ -1,7 +1,7 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 8 +maxTrialNum: 4 trialConcurrency: 4 searchSpacePath: ./search_space.json diff --git a/test/config_test/multi_thread/multi_thread.test.yml b/test/config/multi_thread/config.yml similarity index 100% rename from test/config_test/multi_thread/multi_thread.test.yml rename to test/config/multi_thread/config.yml diff --git a/test/config_test/multi_thread/multi_thread_trial.py b/test/config/multi_thread/multi_thread_trial.py similarity index 100% rename from test/config_test/multi_thread/multi_thread_trial.py rename to test/config/multi_thread/multi_thread_trial.py diff --git a/test/config_test/multi_thread/multi_thread_tuner.py b/test/config/multi_thread/multi_thread_tuner.py similarity index 100% rename from test/config_test/multi_thread/multi_thread_tuner.py rename to test/config/multi_thread/multi_thread_tuner.py diff --git a/test/metrics_test/search_space.json b/test/config/multi_thread/search_space.json similarity index 100% rename from test/metrics_test/search_space.json rename to test/config/multi_thread/search_space.json diff --git a/test/naive_test/README.md b/test/config/naive_test/README.md similarity index 100% rename from test/naive_test/README.md rename to test/config/naive_test/README.md diff --git a/test/naive_test/README_zh_CN.md b/test/config/naive_test/README_zh_CN.md similarity index 100% rename from test/naive_test/README_zh_CN.md rename to test/config/naive_test/README_zh_CN.md diff --git a/test/naive_test/expected_assessor_result.txt b/test/config/naive_test/expected_assessor_result.txt similarity index 100% rename from test/naive_test/expected_assessor_result.txt rename to test/config/naive_test/expected_assessor_result.txt diff --git a/test/naive_test/expected_tuner_result.txt b/test/config/naive_test/expected_tuner_result.txt similarity index 100% rename from test/naive_test/expected_tuner_result.txt rename to test/config/naive_test/expected_tuner_result.txt diff --git a/test/naive_test/local.yml b/test/config/naive_test/local.yml similarity index 100% rename from test/naive_test/local.yml rename to test/config/naive_test/local.yml diff --git a/test/naive_test/local_win32.yml b/test/config/naive_test/local_win32.yml similarity index 100% rename from test/naive_test/local_win32.yml rename to test/config/naive_test/local_win32.yml diff --git a/test/naive_test/naive_assessor.py b/test/config/naive_test/naive_assessor.py similarity index 100% rename from test/naive_test/naive_assessor.py rename to test/config/naive_test/naive_assessor.py diff --git a/test/naive_test/naive_trial.py b/test/config/naive_test/naive_trial.py similarity index 100% rename from test/naive_test/naive_trial.py rename to test/config/naive_test/naive_trial.py diff --git a/test/naive_test/naive_tuner.py b/test/config/naive_test/naive_tuner.py similarity index 100% rename from test/naive_test/naive_tuner.py rename to test/config/naive_test/naive_tuner.py diff --git a/test/naive_test/search_space.json b/test/config/naive_test/search_space.json similarity index 100% rename from test/naive_test/search_space.json rename to test/config/naive_test/search_space.json diff --git a/test/training_service.yml b/test/config/training_service.yml similarity index 100% rename from test/training_service.yml rename to test/config/training_service.yml diff --git a/test/tuner_test/batchtuner_search_space.json b/test/config/tuner_test/batchtuner_search_space.json similarity index 100% rename from test/tuner_test/batchtuner_search_space.json rename to test/config/tuner_test/batchtuner_search_space.json diff --git a/test/tuner_test/local.yml b/test/config/tuner_test/local.yml similarity index 100% rename from test/tuner_test/local.yml rename to test/config/tuner_test/local.yml diff --git a/test/tuner_test/local_win32.yml b/test/config/tuner_test/local_win32.yml similarity index 100% rename from test/tuner_test/local_win32.yml rename to test/config/tuner_test/local_win32.yml diff --git a/test/tuner_test/naive_trial.py b/test/config/tuner_test/naive_trial.py similarity index 100% rename from test/tuner_test/naive_trial.py rename to test/config/tuner_test/naive_trial.py diff --git a/test/tuner_test/search_space.json b/test/config/tuner_test/search_space.json similarity index 100% rename from test/tuner_test/search_space.json rename to test/config/tuner_test/search_space.json diff --git a/test/config_test/tuners/mnist-annotation-anneal.test.yml b/test/config/tuners/mnist-annotation-anneal.yml similarity index 100% rename from test/config_test/tuners/mnist-annotation-anneal.test.yml rename to test/config/tuners/mnist-annotation-anneal.yml diff --git a/test/config_test/tuners/mnist-annotation-evolution.test.yml b/test/config/tuners/mnist-annotation-evolution.yml similarity index 100% rename from test/config_test/tuners/mnist-annotation-evolution.test.yml rename to test/config/tuners/mnist-annotation-evolution.yml diff --git a/test/config_test/tuners/mnist-annotation-random.test.yml b/test/config/tuners/mnist-annotation-random.yml similarity index 100% rename from test/config_test/tuners/mnist-annotation-random.test.yml rename to test/config/tuners/mnist-annotation-random.yml diff --git a/test/config_test/tuners/mnist-annotation-smac.test.yml b/test/config/tuners/mnist-annotation-smac.yml similarity index 100% rename from test/config_test/tuners/mnist-annotation-smac.test.yml rename to test/config/tuners/mnist-annotation-smac.yml diff --git a/test/config_test/tuners/mnist-annotation-tpe.test.yml b/test/config/tuners/mnist-annotation-tpe.yml similarity index 100% rename from test/config_test/tuners/mnist-annotation-tpe.test.yml rename to test/config/tuners/mnist-annotation-tpe.yml diff --git a/test/config_test/tuners/mnist-batchtuner.test.yml b/test/config/tuners/mnist-batchtuner.yml similarity index 100% rename from test/config_test/tuners/mnist-batchtuner.test.yml rename to test/config/tuners/mnist-batchtuner.yml diff --git a/test/config_test/tuners/mnist-bohb.test.yml b/test/config/tuners/mnist-bohb.yml similarity index 100% rename from test/config_test/tuners/mnist-bohb.test.yml rename to test/config/tuners/mnist-bohb.yml diff --git a/test/config_test/tuners/mnist-curvefitting.test.yml b/test/config/tuners/mnist-curvefitting.yml similarity index 100% rename from test/config_test/tuners/mnist-curvefitting.test.yml rename to test/config/tuners/mnist-curvefitting.yml diff --git a/test/config_test/tuners/mnist-gp.test.yml b/test/config/tuners/mnist-gp.yml similarity index 100% rename from test/config_test/tuners/mnist-gp.test.yml rename to test/config/tuners/mnist-gp.yml diff --git a/test/config_test/tuners/mnist-gridsearch.test.yml b/test/config/tuners/mnist-gridsearch.yml similarity index 100% rename from test/config_test/tuners/mnist-gridsearch.test.yml rename to test/config/tuners/mnist-gridsearch.yml diff --git a/test/config_test/tuners/mnist-hyperband.test.yml b/test/config/tuners/mnist-hyperband.yml similarity index 100% rename from test/config_test/tuners/mnist-hyperband.test.yml rename to test/config/tuners/mnist-hyperband.yml diff --git a/test/config_test/tuners/mnist-metis.test.yml b/test/config/tuners/mnist-metis.yml similarity index 100% rename from test/config_test/tuners/mnist-metis.test.yml rename to test/config/tuners/mnist-metis.yml diff --git a/test/config/tuners/mnist-metis.yml.tmp b/test/config/tuners/mnist-metis.yml.tmp new file mode 100644 index 0000000000..91315d4032 --- /dev/null +++ b/test/config/tuners/mnist-metis.yml.tmp @@ -0,0 +1,23 @@ +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +authorName: nni +experimentName: default_test +logCollection: http +maxExecDuration: 5m +maxTrialNum: 2 +multiPhase: false +multiThread: false +searchSpacePath: search_space.json +trainingServicePlatform: local +trial: + codeDir: ../../../examples/trials/mnist-tfv1 + command: python3 mnist.py --batch_num 100 + gpuNum: 0 +trialConcurrency: 1 +tuner: + builtinTunerName: MetisTuner + classArgs: + optimize_mode: maximize +useAnnotation: false diff --git a/test/config_test/tuners/search_space.json b/test/config/tuners/search_space.json similarity index 100% rename from test/config_test/tuners/search_space.json rename to test/config/tuners/search_space.json diff --git a/test/config_test/tuners/search_space_advisor.json b/test/config/tuners/search_space_advisor.json similarity index 100% rename from test/config_test/tuners/search_space_advisor.json rename to test/config/tuners/search_space_advisor.json diff --git a/test/config_test/tuners/search_space_batchtuner.json b/test/config/tuners/search_space_batchtuner.json similarity index 100% rename from test/config_test/tuners/search_space_batchtuner.json rename to test/config/tuners/search_space_batchtuner.json diff --git a/test/config_test/multi_phase/multi_phase.test.yml b/test/config_test/multi_phase/multi_phase.test.yml deleted file mode 100644 index af21870320..0000000000 --- a/test/config_test/multi_phase/multi_phase.test.yml +++ /dev/null @@ -1,22 +0,0 @@ -authorName: nni -experimentName: default_test -maxExecDuration: 5m -maxTrialNum: 8 -trialConcurrency: 4 -searchSpacePath: ./search_space.json - -tuner: - builtinTunerName: TPE - classArgs: - optimize_mode: maximize - -trial: - codeDir: . - command: python3 multi_phase.py - gpuNum: 0 - -useAnnotation: false -multiPhase: true -multiThread: false - -trainingServicePlatform: local diff --git a/test/nni_test/nnitest/__init__.py b/test/nni_test/nnitest/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cli_test.py b/test/nni_test/nnitest/cli_test.py similarity index 100% rename from test/cli_test.py rename to test/nni_test/nnitest/cli_test.py diff --git a/test/config_test.py b/test/nni_test/nnitest/config_test.py similarity index 96% rename from test/config_test.py rename to test/nni_test/nnitest/config_test.py index bce0778d1a..c946ad6c46 100644 --- a/test/config_test.py +++ b/test/nni_test/nnitest/config_test.py @@ -81,7 +81,8 @@ def get_max_values(config_file): def run(args): '''test all configuration files''' if args.config is None: - config_files = glob.glob('./config_test/**/*.test.yml') + assert args.config_dir is not None + config_files = glob.glob('{}/**/*.test.yml'.format(args.config_dir)) else: config_files = args.config.split(',') @@ -111,6 +112,7 @@ def run(args): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default=None) + parser.add_argument("--config_dir", type=str, default=None) parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'paiYarn', 'kubeflow', 'frameworkcontroller'], default='local') parser.add_argument("--local_gpu", action='store_true') diff --git a/test/generate_ts_config.py b/test/nni_test/nnitest/generate_ts_config.py similarity index 98% rename from test/generate_ts_config.py rename to test/nni_test/nnitest/generate_ts_config.py index dbebdc45b6..a2d19e3aa7 100644 --- a/test/generate_ts_config.py +++ b/test/nni_test/nnitest/generate_ts_config.py @@ -2,11 +2,12 @@ # Licensed under the MIT license. import sys +import os import glob import argparse from utils import get_yml_content, dump_yml_content -TRAINING_SERVICE_FILE = 'training_service.yml' +TRAINING_SERVICE_FILE = os.path.join('config', 'training_service.yml') def update_training_service_config(args): config = get_yml_content(TRAINING_SERVICE_FILE) diff --git a/test/metrics_test.py b/test/nni_test/nnitest/metrics_test.py similarity index 100% rename from test/metrics_test.py rename to test/nni_test/nnitest/metrics_test.py diff --git a/test/naive_test.py b/test/nni_test/nnitest/naive_test.py similarity index 72% rename from test/naive_test.py rename to test/nni_test/nnitest/naive_test.py index 590cd44627..f70bc9a250 100644 --- a/test/naive_test.py +++ b/test/nni_test/nnitest/naive_test.py @@ -12,17 +12,19 @@ from utils import is_experiment_done, get_experiment_id, get_nni_log_path, read_last_line, remove_files, setup_experiment, detect_port, snooze from utils import GREEN, RED, CLEAR, EXPERIMENT_URL +NAIVE_TEST_CONFIG_DIR = 'config/naive_test' + def naive_test(): '''run naive integration test''' to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt'] - to_remove = list(map(lambda file: osp.join('naive_test', file), to_remove)) + to_remove = list(map(lambda file: osp.join(NAIVE_TEST_CONFIG_DIR, file), to_remove)) remove_files(to_remove) if sys.platform == 'win32': config_file = 'local_win32.yml' else: config_file = 'local.yml' - proc = subprocess.run(['nnictl', 'create', '--config', osp.join('naive_test' , config_file)]) + proc = subprocess.run(['nnictl', 'create', '--config', osp.join(NAIVE_TEST_CONFIG_DIR, config_file)]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode print('Spawning trials...') @@ -33,8 +35,8 @@ def naive_test(): for _ in range(120): time.sleep(1) - tuner_status = read_last_line(osp.join('naive_test', 'tuner_result.txt')) - assessor_status = read_last_line(osp.join('naive_test', 'assessor_result.txt')) + tuner_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')) + assessor_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt')) experiment_status = is_experiment_done(nnimanager_log_path) assert tuner_status != 'ERROR', 'Tuner exited with error' @@ -44,7 +46,7 @@ def naive_test(): break if tuner_status is not None: - for line in open(osp.join('naive_test', 'tuner_result.txt')): + for line in open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')): if line.strip() == 'ERROR': break trial = int(line.split(' ')[0]) @@ -54,32 +56,33 @@ def naive_test(): assert experiment_status, 'Failed to finish in 2 min' - ss1 = json.load(open(osp.join('naive_test', 'search_space.json'))) - ss2 = json.load(open(osp.join('naive_test', 'tuner_search_space.json'))) + ss1 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'search_space.json'))) + ss2 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_search_space.json'))) assert ss1 == ss2, 'Tuner got wrong search space' - tuner_result = set(open(osp.join('naive_test', 'tuner_result.txt'))) - expected = set(open(osp.join('naive_test', 'expected_tuner_result.txt'))) + tuner_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt'))) + expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_tuner_result.txt'))) # Trials may complete before NNI gets assessor's result, # so it is possible to have more final result than expected print('Tuner result:', tuner_result) print('Expected tuner result:', expected) assert tuner_result.issuperset(expected), 'Bad tuner result' - assessor_result = set(open(osp.join('naive_test', 'assessor_result.txt'))) - expected = set(open(osp.join('naive_test', 'expected_assessor_result.txt'))) + assessor_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt'))) + expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_assessor_result.txt'))) assert assessor_result == expected, 'Bad assessor result' subprocess.run(['nnictl', 'stop']) snooze() def stop_experiment_test(): + config_file = osp.join(NAIVE_TEST_CONFIG_DIR, 'local.yml') '''Test `nnictl stop` command, including `nnictl stop exp_id` and `nnictl stop all`. Simple `nnictl stop` is not tested here since it is used in all other test code''' - subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8080'], check=True) - subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8888'], check=True) - subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8989'], check=True) - subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8990'], check=True) + subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8080'], check=True) + subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8888'], check=True) + subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8989'], check=True) + subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8990'], check=True) # test cmd 'nnictl stop id` experiment_id = get_experiment_id(EXPERIMENT_URL) diff --git a/test/remote_docker.py b/test/nni_test/nnitest/remote_docker.py similarity index 100% rename from test/remote_docker.py rename to test/nni_test/nnitest/remote_docker.py diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py new file mode 100644 index 0000000000..74faf6a6b4 --- /dev/null +++ b/test/nni_test/nnitest/run_tests.py @@ -0,0 +1,161 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import os +import argparse +import subprocess +import time +import traceback +import json +import torch +import ruamel.yaml as yaml + +from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ + parse_max_duration_time, get_succeeded_trial_num, deep_update, print_trial_job_log, get_failed_trial_jobs +from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT +import validators + +it_variables = {} + +def update_training_service_config(config, training_service): + it_ts_config = get_yml_content(os.path.join('config', 'training_service.yml')) + + # hack for kubeflow trial config + if training_service == 'kubeflow': + it_ts_config[training_service]['trial']['worker']['command'] = config['trial']['command'] + config['trial'].pop('command') + if 'gpuNum' in config['trial']: + config['trial'].pop('gpuNum') + + if training_service == 'frameworkcontroller': + it_ts_config[training_service]['trial']['taskRoles'][0]['command'] = config['trial']['command'] + config['trial'].pop('command') + if 'gpuNum' in config['trial']: + config['trial'].pop('gpuNum') + + deep_update(config, it_ts_config['all']) + deep_update(config, it_ts_config[training_service]) + +def run_test_case(test_case_config, it_config, args): + # fill test case default config + for k in it_config['defaultTestCaseConfig']: + if k not in test_case_config: + test_case_config[k] = it_config['defaultTestCaseConfig'][k] + print(json.dumps(test_case_config, indent=4)) + + config_path = os.path.join(args.nni_source_dir, test_case_config['configFile']) + test_yml_config = get_yml_content(config_path) + + # apply training service config + update_training_service_config(test_yml_config, args.ts) + + # apply test case specific config + if test_case_config.get('config') is not None: + deep_update(test_yml_config, test_case_config['config']) + + # check GPU + if test_yml_config['trial']['gpuNum'] > 0 and torch.cuda.device_count() < 1: + print('skipping {}, gpu is not available'.format(test_case_config['name'])) + return + + # generate temporary config yml file to launch experiment + new_config_file = config_path + '.tmp' + dump_yml_content(new_config_file, test_yml_config) + print(yaml.dump(test_yml_config, default_flow_style=False)) + + # set configFile variable + it_variables['$configFile'] = new_config_file + + try: + launch_test(new_config_file, args.ts, test_case_config) + + validator_name = test_case_config.get('validator') + if validator_name is not None: + validator = validators.__dict__[validator_name]() + validator(REST_ENDPOINT, None, args.nni_source_dir) + finally: + print('Stop command:', test_case_config.get('stopCommand')) + if test_case_config.get('stopCommand'): + subprocess.run(test_case_config.get('stopCommand').split(' ')) + # remove tmp config file + if os.path.exists(new_config_file): + os.remove(new_config_file) + +def get_max_values(config_file): + '''Get maxExecDuration and maxTrialNum of experiment''' + experiment_config = get_yml_content(config_file) + return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum'] + +def get_launch_command(test_case_config): + launch_command = test_case_config.get('launchCommand') + assert launch_command is not None + + # replace variables + for k in it_variables: + launch_command = launch_command.replace(k, it_variables[k]) + print('launch command: ', launch_command) + return launch_command + +def launch_test(config_file, training_service, test_case_config): + '''run test per configuration file''' + + proc = subprocess.run(get_launch_command(test_case_config).split(' ')) + assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode + + # set experiment ID into variable + exp_var_name = test_case_config.get('setExperimentIdtoVar') + if exp_var_name is not None: + assert exp_var_name.startswith('$') + it_variables[exp_var_name] = get_experiment_id(EXPERIMENT_URL) + print('variables:', it_variables) + + max_duration, max_trial_num = get_max_values(config_file) + sleep_interval = 3 + + for _ in range(0, max_duration+10, sleep_interval): + time.sleep(sleep_interval) + status = get_experiment_status(STATUS_URL) + if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): + break + + if status != 'DONE' or get_succeeded_trial_num(TRIAL_JOBS_URL) < max_trial_num: + print_trial_job_log(training_service, TRIAL_JOBS_URL) + raise AssertionError('Failed to finish in maxExecDuration') + +def case_excluded(name, excludes): + if name is None: + return False + if excludes is not None: + excludes = excludes.split(',') + for e in excludes: + if name in e or e in name: + return True + return False + +def run(args): + it_config = get_yml_content(args.config) + + for test_case_config in it_config['testCases']: + name = test_case_config['name'] + if case_excluded(name, args.exclude): + print('{} excluded'.format(name)) + continue + if args.case and name and args.case not in name: + continue + print('{}Testing: {}{}'.format(GREEN, name, CLEAR)) + time.sleep(5) + begin_time = time.time() + + run_test_case(test_case_config, it_config, args) + print(GREEN + 'Test %s: TEST PASS IN %d mins' % (name, (time.time() - begin_time)/60) + CLEAR) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True) + parser.add_argument("--nni_source_dir", type=str, default='../') + parser.add_argument("--case", type=str, default=None) + parser.add_argument("--exclude", type=str, default=None) + parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') + args = parser.parse_args() + + run(args) diff --git a/test/tuner_test.py b/test/nni_test/nnitest/tuner_test.py similarity index 100% rename from test/tuner_test.py rename to test/nni_test/nnitest/tuner_test.py diff --git a/test/utils.py b/test/nni_test/nnitest/utils.py similarity index 83% rename from test/utils.py rename to test/nni_test/nnitest/utils.py index 83dcff8499..9daa1952fb 100644 --- a/test/utils.py +++ b/test/nni_test/nnitest/utils.py @@ -99,14 +99,35 @@ def get_succeeded_trial_num(trial_jobs_url): print('num_succeed:', num_succeed) return num_succeed -def get_failed_trial_jobs(trial_jobs_url): +def get_trial_jobs(trial_jobs_url, status=None): '''Return failed trial jobs''' trial_jobs = requests.get(trial_jobs_url).json() - failed_jobs = [] + res = [] for trial_job in trial_jobs: - if trial_job['status'] in ['FAILED']: - failed_jobs.append(trial_job) - return failed_jobs + if status is None or trial_job['status'] == status: + res.append(trial_job) + return res + +def get_failed_trial_jobs(trial_jobs_url): + '''Return failed trial jobs''' + return get_trial_jobs(trial_jobs_url, 'FAILED') + +def print_trial_job_log(training_service, trial_jobs_url): + '''Print job log of FAILED trial jobs''' + trial_jobs = get_trial_jobs(trial_jobs_url) + for trial_job in trial_jobs: + log_files = [] + trial_log_dir = os.path.join(get_experiment_dir(EXPERIMENT_URL), 'trials', trial_job['id']) + if training_service == 'local': + log_files.append(os.path.join(trial_log_dir, 'stderr')) + log_files.append(os.path.join(trial_log_dir, 'trial.log')) + else: + log_files.append(os.path.join(trial_log_dir, 'stdout_log_collection.log')) + for log_file in log_files: + with open(log_file, 'r') as f: + log_content = f.read() + print(log_file, flush=True) + print(log_content, flush=True) def print_failed_job_log(training_service, trial_jobs_url): '''Print job log of FAILED trial jobs''' diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py new file mode 100644 index 0000000000..efeaf2446c --- /dev/null +++ b/test/nni_test/nnitest/validators.py @@ -0,0 +1,52 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import os.path as osp +import json +import requests +from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, METRICS_URL + + +class ITValidator: + def __call__(self, api_root_url, experiment_dir, nni_source_dir): + pass + + +class MetricsValidator(ITValidator): + def __call__(self, api_root_url, experiment_dir, nni_source_dir): + #print('VALIDATOR CALLED!!!') + self.check_metrics(nni_source_dir) + + def check_metrics(self, nni_source_dir): + with open(osp.join(nni_source_dir, 'test', 'config', 'metrics_test', 'expected_metrics.json'), 'r') as f: + expected_metrics = json.load(f) + print('expected metrics:', expected_metrics) + metrics = requests.get(METRICS_URL).json() + intermediate_result, final_result = self.get_metric_results(metrics) + + assert intermediate_result and final_result + for trialjob_id in intermediate_result: + trial_final_result = final_result[trialjob_id] + trial_intermediate_result = intermediate_result[trialjob_id] + print('intermediate result:', trial_intermediate_result) + print('final result:', trial_final_result) + assert len(trial_final_result) == 1, 'there should be 1 final result' + assert trial_final_result[0] == expected_metrics['final_result'] + assert set(trial_intermediate_result) == set(expected_metrics['intermediate_result']) + + def get_metric_results(self, metrics): + intermediate_result = {} + final_result = {} + for metric in metrics: + metric_value = round(float(json.loads(metric['data'])), 2) + if metric['type'] == 'PERIODICAL': + if metric['trialJobId'] in intermediate_result: + intermediate_result[metric['trialJobId']].append(metric_value) + else: + intermediate_result[metric['trialJobId']] = [metric_value] + elif metric['type'] == 'FINAL': + if metric['trialJobId'] in final_result: + final_result[metric['trialJobId']].append(metric_value) + else: + final_result[metric['trialJobId']] = [metric_value] + return intermediate_result, final_result diff --git a/test/nni_test/setup.py b/test/nni_test/setup.py new file mode 100644 index 0000000000..e2f12606ff --- /dev/null +++ b/test/nni_test/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages + +setup( + name="nnitest", + version="0.0.1", + author = 'Microsoft NNI team', + author_email = 'nni@microsoft.com', + description = 'Neural Network Intelligence package', + license = 'MIT', + url = 'https://github.com/Microsoft/nni', + packages=find_packages('nnitest'), + long_description="", + classifiers = [ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + "Operating System :: OS Independent" + ], +) diff --git a/test/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml similarity index 100% rename from test/pipelines-it-frameworkcontroller.yml rename to test/pipelines/pipelines-it-frameworkcontroller.yml diff --git a/test/pipelines-it-installation.yml b/test/pipelines/pipelines-it-installation.yml similarity index 100% rename from test/pipelines-it-installation.yml rename to test/pipelines/pipelines-it-installation.yml diff --git a/test/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml similarity index 100% rename from test/pipelines-it-kubeflow.yml rename to test/pipelines/pipelines-it-kubeflow.yml diff --git a/test/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml similarity index 100% rename from test/pipelines-it-local-windows.yml rename to test/pipelines/pipelines-it-local-windows.yml diff --git a/test/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml similarity index 75% rename from test/pipelines-it-local.yml rename to test/pipelines/pipelines-it-local.yml index ec95b05151..c95ec39bb8 100644 --- a/test/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -24,21 +24,17 @@ jobs: displayName: 'Unit test' - script: | cd test - PATH=$HOME/.local/bin:$PATH python3 naive_test.py + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - script: | cd test - PATH=$HOME/.local/bin:$PATH python3 tuner_test.py + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/tuner_test.py displayName: 'Built-in tuners / assessors tests' - script: | cd test - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local --local_gpu + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local displayName: 'Examples and advanced features tests on local machine' - script: | cd test - PATH=$HOME/.local/bin:$PATH python3 metrics_test.py - displayName: 'Trial job metrics test' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 cli_test.py + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/cli_test.py displayName: 'nnicli test' diff --git a/test/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml similarity index 100% rename from test/pipelines-it-pai-windows.yml rename to test/pipelines/pipelines-it-pai-windows.yml diff --git a/test/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml similarity index 100% rename from test/pipelines-it-pai.yml rename to test/pipelines/pipelines-it-pai.yml diff --git a/test/pipelines-it-paiYarn.yml b/test/pipelines/pipelines-it-paiYarn.yml similarity index 100% rename from test/pipelines-it-paiYarn.yml rename to test/pipelines/pipelines-it-paiYarn.yml diff --git a/test/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml similarity index 100% rename from test/pipelines-it-remote-windows.yml rename to test/pipelines/pipelines-it-remote-windows.yml diff --git a/test/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml similarity index 100% rename from test/pipelines-it-remote.yml rename to test/pipelines/pipelines-it-remote.yml diff --git a/test/it.sh b/test/scripts/it.sh similarity index 100% rename from test/it.sh rename to test/scripts/it.sh diff --git a/test/unittest.ps1 b/test/scripts/unittest.ps1 similarity index 100% rename from test/unittest.ps1 rename to test/scripts/unittest.ps1 diff --git a/test/unittest.sh b/test/scripts/unittest.sh old mode 100755 new mode 100644 similarity index 100% rename from test/unittest.sh rename to test/scripts/unittest.sh From 3c4b3d7e7d383e5a510ffed9f013efeb9a7fc8b3 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 13:10:18 +0800 Subject: [PATCH 02/38] updates (#2166) --- test/pipelines/pipelines-it-local.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index c95ec39bb8..3f7b0b07f5 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -20,7 +20,7 @@ jobs: displayName: 'Install dependencies for integration tests' - script: | cd test - source unittest.sh + source scripts/unittest.sh displayName: 'Unit test' - script: | cd test From a246a8c069f693864a6e0a6dd90ab576307b356a Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 13:28:46 +0800 Subject: [PATCH 03/38] update tuner test (#2167) --- test/nni_test/nnitest/tuner_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/nni_test/nnitest/tuner_test.py b/test/nni_test/nnitest/tuner_test.py index a044429fad..41581ea9fa 100644 --- a/test/nni_test/nnitest/tuner_test.py +++ b/test/nni_test/nnitest/tuner_test.py @@ -17,9 +17,9 @@ def get_config_file_path(): if sys.platform == 'win32': - config_file = osp.join('tuner_test', 'local_win32.yml') + config_file = osp.join('config', 'tuner_test', 'local_win32.yml') else: - config_file = osp.join('tuner_test', 'local.yml') + config_file = osp.join('config', 'tuner_test', 'local.yml') return config_file def switch(dispatch_type, dispatch_name): From ecd5893864cf50bebdb15129b29ea5ee74f6f78b Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 17:12:51 +0800 Subject: [PATCH 04/38] Refactor integration tests (#2168) --- .../curvefitting.yml} | 10 +++--- .../medianstop.yml} | 32 ++++++++++--------- test/config/assessors/search_space.json | 7 ++++ test/config/assessors/trial.py | 25 +++++++++++++++ test/config/integration_tests.yml | 21 ++++++++++-- test/nni_test/nnitest/generate_ts_config.py | 2 -- test/nni_test/nnitest/run_tests.py | 18 ++++++++--- test/nni_test/nnitest/utils.py | 10 +++--- .../pipelines-it-frameworkcontroller.yml | 6 ++-- test/pipelines/pipelines-it-kubeflow.yml | 6 ++-- test/pipelines/pipelines-it-local-windows.yml | 24 +++----------- test/pipelines/pipelines-it-local.yml | 10 +----- test/pipelines/pipelines-it-pai-windows.yml | 5 ++- test/pipelines/pipelines-it-pai.yml | 6 ++-- .../pipelines/pipelines-it-remote-windows.yml | 6 ++-- test/pipelines/pipelines-it-remote.yml | 7 ++-- 16 files changed, 110 insertions(+), 85 deletions(-) rename test/config/{tuners/mnist-curvefitting.yml => assessors/curvefitting.yml} (72%) rename test/config/{tuners/mnist-metis.yml.tmp => assessors/medianstop.yml} (58%) create mode 100644 test/config/assessors/search_space.json create mode 100644 test/config/assessors/trial.py diff --git a/test/config/tuners/mnist-curvefitting.yml b/test/config/assessors/curvefitting.yml similarity index 72% rename from test/config/tuners/mnist-curvefitting.yml rename to test/config/assessors/curvefitting.yml index 194ffd3300..c6c0f393ca 100644 --- a/test/config/tuners/mnist-curvefitting.yml +++ b/test/config/assessors/curvefitting.yml @@ -1,8 +1,8 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 2 -trialConcurrency: 1 +maxTrialNum: 8 +trialConcurrency: 8 searchSpacePath: search_space.json tuner: @@ -17,12 +17,12 @@ assessor: start_step: 6 threshold: 0.95 trial: - codeDir: ../../../examples/trials/mnist-tfv1 - command: python3 mnist.py --batch_num 100 + codeDir: ./ + command: python3 trial.py gpuNum: 0 useAnnotation: false multiPhase: false multiThread: false -trainingServicePlatform: local \ No newline at end of file +trainingServicePlatform: local diff --git a/test/config/tuners/mnist-metis.yml.tmp b/test/config/assessors/medianstop.yml similarity index 58% rename from test/config/tuners/mnist-metis.yml.tmp rename to test/config/assessors/medianstop.yml index 91315d4032..2a2983d500 100644 --- a/test/config/tuners/mnist-metis.yml.tmp +++ b/test/config/assessors/medianstop.yml @@ -1,23 +1,25 @@ -assessor: - builtinAssessorName: Medianstop - classArgs: - optimize_mode: maximize authorName: nni experimentName: default_test -logCollection: http maxExecDuration: 5m -maxTrialNum: 2 -multiPhase: false -multiThread: false +maxTrialNum: 8 +trialConcurrency: 8 searchSpacePath: search_space.json -trainingServicePlatform: local -trial: - codeDir: ../../../examples/trials/mnist-tfv1 - command: python3 mnist.py --batch_num 100 - gpuNum: 0 -trialConcurrency: 1 + tuner: - builtinTunerName: MetisTuner + builtinTunerName: TPE classArgs: optimize_mode: maximize + +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize + +trial: + codeDir: ./ + command: python3 trial.py + gpuNum: 0 + useAnnotation: false + +trainingServicePlatform: local diff --git a/test/config/assessors/search_space.json b/test/config/assessors/search_space.json new file mode 100644 index 0000000000..0b7ef39ec3 --- /dev/null +++ b/test/config/assessors/search_space.json @@ -0,0 +1,7 @@ +{ + "test": + { + "_type" : "choice", + "_value" : [1, 100] + } +} diff --git a/test/config/assessors/trial.py b/test/config/assessors/trial.py new file mode 100644 index 0000000000..20cbd89f5a --- /dev/null +++ b/test/config/assessors/trial.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import random +import time +import nni + +if __name__ == '__main__': + print('trial start') + if random.random() > 0.5: + up = True + else: + up = False + v = 0.5 + nni.get_next_parameter() + for i in range(20): + time.sleep(1) + for _ in range(5): + if up: + v *= 1.1 + else: + v *= 0.9 + nni.report_intermediate_result(v) + nni.report_final_result(v) + print('trial done') diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index bcb6382aa3..92fd26c863 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -69,6 +69,15 @@ testCases: trialConcurrency: 1 validator: MetricsValidator +- name: nnicli + configFile: test/config/examples/sklearn-regression.yml + config: + maxTrialNum: 4 + trialConcurrency: 4 + launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' + stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' + validator: + # Experiment resume test part 1 - name: resume-1 configFile: test/config/examples/sklearn-regression.yml @@ -104,6 +113,15 @@ testCases: - name: multi-phase-tpe configFile: test/config/multi_phase/tpe.yml +######################################################################### +# nni assessor test +######################################################################### +- name: assessor-curvefitting + configFile: test/config/assessors/curvefitting.yml + +- name: assessor-medianstop + configFile: test/config/assessors/medianstop.yml + ######################################################################### # nni tuners test ######################################################################### @@ -128,9 +146,6 @@ testCases: - name: tuner-bohb configFile: test/config/tuners/mnist-bohb.yml -- name: tuner-curvefitting - configFile: test/config/tuners/mnist-curvefitting.yml - - name: tuner-gp configFile: test/config/tuners/mnist-gp.yml diff --git a/test/nni_test/nnitest/generate_ts_config.py b/test/nni_test/nnitest/generate_ts_config.py index a2d19e3aa7..ab5340b724 100644 --- a/test/nni_test/nnitest/generate_ts_config.py +++ b/test/nni_test/nnitest/generate_ts_config.py @@ -131,5 +131,3 @@ def convert_command(): args = parser.parse_args() update_training_service_config(args) - if args.ts == 'local': - convert_command() diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 74faf6a6b4..5c37dfe378 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -1,17 +1,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import sys import os import argparse import subprocess import time +import shlex import traceback import json import torch import ruamel.yaml as yaml from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ - parse_max_duration_time, get_succeeded_trial_num, deep_update, print_trial_job_log, get_failed_trial_jobs + parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT import validators @@ -53,6 +55,10 @@ def run_test_case(test_case_config, it_config, args): if test_case_config.get('config') is not None: deep_update(test_yml_config, test_case_config['config']) + # hack for windows + if sys.platform == 'win32': + test_yml_config['trial']['command'] = test_yml_config['trial']['command'].replace('python3', 'python') + # check GPU if test_yml_config['trial']['gpuNum'] > 0 and torch.cuda.device_count() < 1: print('skipping {}, gpu is not available'.format(test_case_config['name'])) @@ -76,7 +82,7 @@ def run_test_case(test_case_config, it_config, args): finally: print('Stop command:', test_case_config.get('stopCommand')) if test_case_config.get('stopCommand'): - subprocess.run(test_case_config.get('stopCommand').split(' ')) + subprocess.run(shlex.split(test_case_config.get('stopCommand'))) # remove tmp config file if os.path.exists(new_config_file): os.remove(new_config_file) @@ -99,7 +105,8 @@ def get_launch_command(test_case_config): def launch_test(config_file, training_service, test_case_config): '''run test per configuration file''' - proc = subprocess.run(get_launch_command(test_case_config).split(' ')) + proc = subprocess.run(shlex.split(get_launch_command(test_case_config))) + assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode # set experiment ID into variable @@ -117,8 +124,9 @@ def launch_test(config_file, training_service, test_case_config): status = get_experiment_status(STATUS_URL) if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): break - - if status != 'DONE' or get_succeeded_trial_num(TRIAL_JOBS_URL) < max_trial_num: + trial_stats = get_trial_stats(TRIAL_JOBS_URL) + print(json.dumps(trial_stats, indent=4), flush=True) + if status != 'DONE' or trial_stats['SUCCEEDED'] + trial_stats['EARLY_STOPPED'] < max_trial_num: print_trial_job_log(training_service, TRIAL_JOBS_URL) raise AssertionError('Failed to finish in maxExecDuration') diff --git a/test/nni_test/nnitest/utils.py b/test/nni_test/nnitest/utils.py index 9daa1952fb..31c2427d78 100644 --- a/test/nni_test/nnitest/utils.py +++ b/test/nni_test/nnitest/utils.py @@ -90,14 +90,12 @@ def get_experiment_status(status_url): nni_status = requests.get(status_url).json() return nni_status['status'] -def get_succeeded_trial_num(trial_jobs_url): +def get_trial_stats(trial_jobs_url): trial_jobs = requests.get(trial_jobs_url).json() - num_succeed = 0 + trial_stats = collections.defaultdict(int) for trial_job in trial_jobs: - if trial_job['status'] in ['SUCCEEDED', 'EARLY_STOPPED']: - num_succeed += 1 - print('num_succeed:', num_succeed) - return num_succeed + trial_stats[trial_job['status']] += 1 + return trial_stats def get_trial_jobs(trial_jobs_url, status=None): '''Return failed trial jobs''' diff --git a/test/pipelines/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml index 7daf8a6feb..9e21054202 100644 --- a/test/pipelines/pipelines-it-frameworkcontroller.yml +++ b/test/pipelines/pipelines-it-frameworkcontroller.yml @@ -47,9 +47,9 @@ jobs: fi echo "TEST_IMG:$TEST_IMG" cd test - python3 generate_ts_config.py --ts frameworkcontroller --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \ + python3 nni_test/nnitest/generate_ts_config.py --ts frameworkcontroller --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \ --azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip) - cat training_service.yml - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts frameworkcontroller --exclude multi_phase + cat config/training_service.yml + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts frameworkcontroller --exclude multi_phase displayName: 'integration test' diff --git a/test/pipelines/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml index 660ac16b6a..e7c38861cc 100644 --- a/test/pipelines/pipelines-it-kubeflow.yml +++ b/test/pipelines/pipelines-it-kubeflow.yml @@ -47,9 +47,9 @@ jobs: fi echo "TEST_IMG:$TEST_IMG" cd test - python3 generate_ts_config.py --ts kubeflow --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \ + python3 nni_test/nnitest/generate_ts_config.py --ts kubeflow --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \ --azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip) - cat training_service.yml - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts kubeflow --exclude multi_phase + cat config/training_service.yml + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts kubeflow --exclude multi_phase displayName: 'integration test' diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index a4a7b1d84e..3fe4d7f48d 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -14,29 +14,13 @@ jobs: displayName: 'Install dependencies for integration tests' - script: | cd test - python generate_ts_config.py --ts local - displayName: 'generate config files' - - script: | - cd test - python config_test.py --ts local --local_gpu --exclude smac,bohb - displayName: 'Examples and advanced features tests on local machine' - - script: | - cd test - powershell.exe -file unittest.ps1 + powershell.exe -file scripts/unittest.ps1 displayName: 'unit test' - script: | cd test - python naive_test.py + python nni_test/nnitest/naive_test.py displayName: 'Naive test' - script: | cd test - python tuner_test.py - displayName: 'Built-in tuners / assessors tests' - - script: | - cd test - python metrics_test.py - displayName: 'Trial job metrics test' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 cli_test.py - displayName: 'nnicli test' + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local --exclude smac,bohb + displayName: 'Integration tests' diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index 3f7b0b07f5..fa3922f986 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -26,15 +26,7 @@ jobs: cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/tuner_test.py - displayName: 'Built-in tuners / assessors tests' - script: | cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local - displayName: 'Examples and advanced features tests on local machine' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/cli_test.py - displayName: 'nnicli test' + displayName: 'Integration tests' diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index b814f84eb0..6aa5a24a31 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -63,7 +63,6 @@ jobs: cd test set PATH=$(ENV_PATH) python --version - python generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) - - python config_test.py --ts pai --exclude multi_phase,smac,bohb + python nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi_phase,smac,bohb displayName: 'Examples and advanced features tests on pai' \ No newline at end of file diff --git a/test/pipelines/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml index e52d835a17..41c10e788d 100644 --- a/test/pipelines/pipelines-it-pai.yml +++ b/test/pipelines/pipelines-it-pai.yml @@ -51,9 +51,7 @@ jobs: echo "TEST_IMG:$TEST_IMG" cd test - python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\ + python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\ --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip) - - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai --exclude multi_phase - PATH=$HOME/.local/bin:$PATH python3 metrics_test.py + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi_phase displayName: 'integration test' diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index f5feb2b43e..be24c990c6 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -42,9 +42,9 @@ jobs: displayName: 'Get docker port' - powershell: | cd test - python generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) --remote_port $(Get-Content port) --remote_pwd $(docker_pwd) --nni_manager_ip $(nni_manager_ip) - Get-Content training_service.yml - python config_test.py --ts remote --exclude cifar10,smac,bohb + python nni_test/nnitest/generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) --remote_port $(Get-Content port) --remote_pwd $(docker_pwd) --nni_manager_ip $(nni_manager_ip) + Get-Content config/training_service.yml + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote --exclude cifar10,smac,bohb displayName: 'integration test' - task: SSH@0 inputs: diff --git a/test/pipelines/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml index 03bc177ed9..f4db589722 100644 --- a/test/pipelines/pipelines-it-remote.yml +++ b/test/pipelines/pipelines-it-remote.yml @@ -53,11 +53,10 @@ jobs: - script: | set -e cd test - python3 generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) \ + python3 nni_test/nnitest/generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) \ --remote_port $(cat port) --remote_pwd $(docker_pwd) --nni_manager_ip $(nni_manager_ip) - cat training_service.yml - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts remote --exclude cifar10 - PATH=$HOME/.local/bin:$PATH python3 metrics_test.py + cat config/training_service.yml + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote displayName: 'integration test' - task: SSH@0 inputs: From c7c2f2eceecdfbd453a4657dd8d4d18f8a331905 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 17:37:58 +0800 Subject: [PATCH 05/38] update remote pipeline (#2169) --- test/pipelines/pipelines-it-remote-windows.yml | 2 +- test/pipelines/pipelines-it-remote.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index be24c990c6..481a31353e 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -29,7 +29,7 @@ jobs: inputs: sshEndpoint: $(end_point) runOptions: commands - commands: python3 /tmp/nnitest/$(Build.BuildId)/nni-remote/test/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni --os windows + commands: python3 /tmp/nnitest/$(Build.BuildId)/nni-remote/test/nni_test/nnitest/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni --os windows displayName: 'Start docker' - powershell: | Write-Host "Downloading Putty..." diff --git a/test/pipelines/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml index f4db589722..c8abae058f 100644 --- a/test/pipelines/pipelines-it-remote.yml +++ b/test/pipelines/pipelines-it-remote.yml @@ -39,7 +39,7 @@ jobs: inputs: sshEndpoint: $(end_point) runOptions: commands - commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni + commands: python3 /tmp/nnitest/$(Build.BuildId)/test/nni_test/nnitest/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni displayName: 'Start docker' - task: DownloadSecureFile@1 inputs: From 0c04e78dd724c52274047e05ffc24e90b83c9a83 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 18:01:15 +0800 Subject: [PATCH 06/38] add torch (#2170) --- test/pipelines/pipelines-it-frameworkcontroller.yml | 1 + test/pipelines/pipelines-it-kubeflow.yml | 1 + test/pipelines/pipelines-it-pai-windows.yml | 1 + test/pipelines/pipelines-it-pai.yml | 1 + test/pipelines/pipelines-it-remote-windows.yml | 1 + test/pipelines/pipelines-it-remote.yml | 1 + 6 files changed, 6 insertions(+) diff --git a/test/pipelines/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml index 9e21054202..57762f4585 100644 --- a/test/pipelines/pipelines-it-frameworkcontroller.yml +++ b/test/pipelines/pipelines-it-frameworkcontroller.yml @@ -22,6 +22,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml index e7c38861cc..a77c1274f4 100644 --- a/test/pipelines/pipelines-it-kubeflow.yml +++ b/test/pipelines/pipelines-it-kubeflow.yml @@ -22,6 +22,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index 6aa5a24a31..0617897989 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -58,6 +58,7 @@ jobs: - script: | set PATH=$(ENV_PATH) python -m pip install scikit-learn==0.21.0 --user + python -m pip install torch===1.2.0 -f https://download.pytorch.org/whl/torch_stable.html --user displayName: 'Install dependencies for integration tests' - script: | cd test diff --git a/test/pipelines/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml index 41c10e788d..7f05c768fd 100644 --- a/test/pipelines/pipelines-it-pai.yml +++ b/test/pipelines/pipelines-it-pai.yml @@ -22,6 +22,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index 481a31353e..e450a5c592 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -17,6 +17,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | python -m pip install scikit-learn==0.20.1 --user + python -m pip install torch===1.2.0 -f https://download.pytorch.org/whl/torch_stable.html --user displayName: 'Install dependencies for integration tests' - task: SSH@0 inputs: diff --git a/test/pipelines/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml index c8abae058f..03165c1213 100644 --- a/test/pipelines/pipelines-it-remote.yml +++ b/test/pipelines/pipelines-it-remote.yml @@ -15,6 +15,7 @@ jobs: source install.sh displayName: 'Install nni toolkit via source code' - script: | + python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB From e39958f13651746b42725f314d727a253ac54de8 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 20:22:57 +0800 Subject: [PATCH 07/38] remove torch (#2172) --- test/nni_test/nnitest/run_tests.py | 6 ------ test/pipelines/pipelines-it-frameworkcontroller.yml | 1 - test/pipelines/pipelines-it-kubeflow.yml | 1 - test/pipelines/pipelines-it-pai-windows.yml | 1 - test/pipelines/pipelines-it-pai.yml | 1 - test/pipelines/pipelines-it-remote-windows.yml | 1 - test/pipelines/pipelines-it-remote.yml | 1 - 7 files changed, 12 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 5c37dfe378..d387f814a2 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -9,7 +9,6 @@ import shlex import traceback import json -import torch import ruamel.yaml as yaml from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ @@ -59,11 +58,6 @@ def run_test_case(test_case_config, it_config, args): if sys.platform == 'win32': test_yml_config['trial']['command'] = test_yml_config['trial']['command'].replace('python3', 'python') - # check GPU - if test_yml_config['trial']['gpuNum'] > 0 and torch.cuda.device_count() < 1: - print('skipping {}, gpu is not available'.format(test_case_config['name'])) - return - # generate temporary config yml file to launch experiment new_config_file = config_path + '.tmp' dump_yml_content(new_config_file, test_yml_config) diff --git a/test/pipelines/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml index 57762f4585..9e21054202 100644 --- a/test/pipelines/pipelines-it-frameworkcontroller.yml +++ b/test/pipelines/pipelines-it-frameworkcontroller.yml @@ -22,7 +22,6 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | - python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml index a77c1274f4..e7c38861cc 100644 --- a/test/pipelines/pipelines-it-kubeflow.yml +++ b/test/pipelines/pipelines-it-kubeflow.yml @@ -22,7 +22,6 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | - python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index 0617897989..6aa5a24a31 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -58,7 +58,6 @@ jobs: - script: | set PATH=$(ENV_PATH) python -m pip install scikit-learn==0.21.0 --user - python -m pip install torch===1.2.0 -f https://download.pytorch.org/whl/torch_stable.html --user displayName: 'Install dependencies for integration tests' - script: | cd test diff --git a/test/pipelines/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml index 7f05c768fd..41c10e788d 100644 --- a/test/pipelines/pipelines-it-pai.yml +++ b/test/pipelines/pipelines-it-pai.yml @@ -22,7 +22,6 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | - python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index e450a5c592..481a31353e 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -17,7 +17,6 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | python -m pip install scikit-learn==0.20.1 --user - python -m pip install torch===1.2.0 -f https://download.pytorch.org/whl/torch_stable.html --user displayName: 'Install dependencies for integration tests' - task: SSH@0 inputs: diff --git a/test/pipelines/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml index 03165c1213..c8abae058f 100644 --- a/test/pipelines/pipelines-it-remote.yml +++ b/test/pipelines/pipelines-it-remote.yml @@ -15,7 +15,6 @@ jobs: source install.sh displayName: 'Install nni toolkit via source code' - script: | - python3 -m pip install torch==1.2.0 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB From 9755275243783b991bff09122de303ba4d6ad267 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 21:50:11 +0800 Subject: [PATCH 08/38] Model compression test (#2175) --- test/pipelines/pipelines-it-local.yml | 6 +++- test/scripts/model_compression.sh | 44 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/scripts/model_compression.sh diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index fa3922f986..b3dca9243c 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -29,4 +29,8 @@ jobs: - script: | cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local - displayName: 'Integration tests' + displayName: 'Integration test' + - script: | + cd test + source scripts/model_compression.sh + displayName: 'Model compression test' diff --git a/test/scripts/model_compression.sh b/test/scripts/model_compression.sh new file mode 100644 index 0000000000..ade720f86c --- /dev/null +++ b/test/scripts/model_compression.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -e +CWD=${PWD} + +echo "" +echo "===========================Testing: pruning and speedup===========================" +cd ${CWD}/../examples/model_compress + +echo "testing slim pruning and speedup..." +python3 model_prune_torch.py --pruner_name slim --pretrain_epochs 1 --prune_epochs 1 +python3 model_speedup.py --example_name slim --model_checkpoint ./checkpoints/pruned_vgg19_cifar10_slim.pth \ + --masks_file ./checkpoints/mask_vgg19_cifar10_slim.pth + +echo "testing l1 pruning and speedup..." +python3 model_prune_torch.py --pruner_name l1 --pretrain_epochs 1 --prune_epochs 1 +python3 model_speedup.py --example_name l1filter --model_checkpoint ./checkpoints/pruned_vgg16_cifar10_l1.pth \ + --masks_file ./checkpoints/mask_vgg16_cifar10_l1.pth + +echo "testing apoz pruning and speedup..." +python3 model_prune_torch.py --pruner_name apoz --pretrain_epochs 1 --prune_epochs 1 +python3 model_speedup.py --example_name apoz --model_checkpoint ./checkpoints/pruned_vgg16_cifar10_apoz.pth \ + --masks_file ./checkpoints/mask_vgg16_cifar10_apoz.pth + +for name in level fpgm mean_activation +do + echo "testing $name pruning..." + python3 model_prune_torch.py --pruner_name $name --pretrain_epochs 1 --prune_epochs 1 +done + +echo "testing lottery ticket pruning..." +python3 lottery_torch_mnist_fc.py + +echo "" +echo "===========================Testing: quantizers===========================" +cd ${CWD}/../examples/model_compress + +echo "testing QAT quantizer..." +python3 QAT_torch_quantizer.py + +echo "testing DoReFa quantizer..." +python3 DoReFaQuantizer_torch_mnist.py + +echo "testing BNN quantizer..." +python3 BNN_quantizer_cifar10.py From c358adc13a438bc350072bc3710299fb4bda10f5 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 23:32:46 +0800 Subject: [PATCH 09/38] Remove unused files (#2176) --- test/config/integration_tests.yml | 11 +- .../tuner_test/batchtuner_search_space.json | 8 -- test/config/tuner_test/local.yml | 20 --- test/config/tuner_test/local_win32.yml | 20 --- test/config/tuner_test/naive_trial.py | 10 -- test/config/tuner_test/search_space.json | 7 - test/nni_test/nnitest/cli_test.py | 39 ------ test/nni_test/nnitest/config_test.py | 124 ------------------ test/nni_test/nnitest/generate_ts_config.py | 12 -- test/nni_test/nnitest/metrics_test.py | 82 ------------ test/nni_test/nnitest/tuner_test.py | 88 ------------- test/nni_test/nnitest/utils.py | 11 +- test/nni_test/nnitest/validators.py | 17 ++- test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-local.yml | 4 +- 15 files changed, 30 insertions(+), 425 deletions(-) delete mode 100644 test/config/tuner_test/batchtuner_search_space.json delete mode 100644 test/config/tuner_test/local.yml delete mode 100644 test/config/tuner_test/local_win32.yml delete mode 100644 test/config/tuner_test/naive_trial.py delete mode 100644 test/config/tuner_test/search_space.json delete mode 100644 test/nni_test/nnitest/cli_test.py delete mode 100644 test/nni_test/nnitest/config_test.py delete mode 100644 test/nni_test/nnitest/metrics_test.py delete mode 100644 test/nni_test/nnitest/tuner_test.py diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 92fd26c863..9d52bee66a 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -76,18 +76,23 @@ testCases: trialConcurrency: 4 launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' - validator: + validator: NnicliValidator # Experiment resume test part 1 -- name: resume-1 +- name: nnictl-resume-1 configFile: test/config/examples/sklearn-regression.yml setExperimentIdtoVar: $resumeExpId # Experiment resume test part 2 -- name: resume-2 +- name: nnictl-resume-2 configFile: test/config/examples/sklearn-regression.yml launchCommand: nnictl resume $resumeExpId +# Experiment view test +- name: nnictl-view + configFile: test/config/examples/sklearn-regression.yml + launchCommand: nnictl view $resumeExpId + - name: multi-thread configFile: test/config/multi_thread/config.yml diff --git a/test/config/tuner_test/batchtuner_search_space.json b/test/config/tuner_test/batchtuner_search_space.json deleted file mode 100644 index 5d3beeee85..0000000000 --- a/test/config/tuner_test/batchtuner_search_space.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "combine_params": - { - "_type" : "choice", - "_value" : [{"x": 1}, - {"x": 100}] - } -} diff --git a/test/config/tuner_test/local.yml b/test/config/tuner_test/local.yml deleted file mode 100644 index e5598914ad..0000000000 --- a/test/config/tuner_test/local.yml +++ /dev/null @@ -1,20 +0,0 @@ -assessor: - builtinAssessorName: Medianstop - classArgs: - optimize_mode: maximize -authorName: nni -experimentName: test_sdk -maxExecDuration: 1h -maxTrialNum: 2 -searchSpacePath: search_space.json -trainingServicePlatform: local -trial: - codeDir: . - command: python3 naive_trial.py - gpuNum: 0 -trialConcurrency: 2 -tuner: - builtinTunerName: Evolution - classArgs: - optimize_mode: maximize -useAnnotation: false diff --git a/test/config/tuner_test/local_win32.yml b/test/config/tuner_test/local_win32.yml deleted file mode 100644 index 5cd144a789..0000000000 --- a/test/config/tuner_test/local_win32.yml +++ /dev/null @@ -1,20 +0,0 @@ -assessor: - builtinAssessorName: Medianstop - classArgs: - optimize_mode: maximize -authorName: nni -experimentName: test_sdk -maxExecDuration: 1h -maxTrialNum: 2 -searchSpacePath: search_space.json -trainingServicePlatform: local -trial: - codeDir: . - command: python naive_trial.py - gpuNum: 0 -trialConcurrency: 2 -tuner: - builtinTunerName: Evolution - classArgs: - optimize_mode: maximize -useAnnotation: false diff --git a/test/config/tuner_test/naive_trial.py b/test/config/tuner_test/naive_trial.py deleted file mode 100644 index ba71dedff2..0000000000 --- a/test/config/tuner_test/naive_trial.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import nni - -params = nni.get_next_parameter() -print('params:', params) -x = params['x'] - -nni.report_final_result(x) diff --git a/test/config/tuner_test/search_space.json b/test/config/tuner_test/search_space.json deleted file mode 100644 index f20e76e0c5..0000000000 --- a/test/config/tuner_test/search_space.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "x": - { - "_type" : "choice", - "_value" : [1, 100] - } -} diff --git a/test/nni_test/nnitest/cli_test.py b/test/nni_test/nnitest/cli_test.py deleted file mode 100644 index 9ba5c2bcc3..0000000000 --- a/test/nni_test/nnitest/cli_test.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import sys -import time -import traceback -from utils import GREEN, RED, CLEAR, setup_experiment - -def test_nni_cli(): - import nnicli as nc - - config_file = 'config_test/examples/mnist-tfv1.test.yml' - - try: - # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict - time.sleep(6) - print(GREEN + 'Testing nnicli:' + config_file + CLEAR) - nc.start_nni(config_file) - time.sleep(3) - nc.set_endpoint('http://localhost:8080') - print(nc.version()) - print(nc.get_job_statistics()) - print(nc.get_experiment_status()) - nc.list_trial_jobs() - - print(GREEN + 'Test nnicli {}: TEST PASS'.format(config_file) + CLEAR) - except Exception as error: - print(RED + 'Test nnicli {}: TEST FAIL'.format(config_file) + CLEAR) - print('%r' % error) - traceback.print_exc() - raise error - finally: - nc.stop_nni() - -if __name__ == '__main__': - installed = (sys.argv[-1] != '--preinstall') - setup_experiment(installed) - - test_nni_cli() diff --git a/test/nni_test/nnitest/config_test.py b/test/nni_test/nnitest/config_test.py deleted file mode 100644 index c946ad6c46..0000000000 --- a/test/nni_test/nnitest/config_test.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import os -import argparse -import glob -import subprocess -import time -import traceback -import json - -from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \ - parse_max_duration_time, get_succeeded_trial_num, deep_update, print_failed_job_log, get_failed_trial_jobs -from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL - -def gen_new_config(config_file, training_service='local'): - ''' - Generates temporary config file for integration test, the file - should be deleted after testing. - ''' - config = get_yml_content(config_file) - new_config_file = config_file + '.tmp' - - it_config = get_yml_content('training_service.yml') - - # hack for kubeflow trial config - if training_service == 'kubeflow': - it_config[training_service]['trial']['worker']['command'] = config['trial']['command'] - config['trial'].pop('command') - if 'gpuNum' in config['trial']: - config['trial'].pop('gpuNum') - - if training_service == 'frameworkcontroller': - it_config[training_service]['trial']['taskRoles'][0]['command'] = config['trial']['command'] - config['trial'].pop('command') - if 'gpuNum' in config['trial']: - config['trial'].pop('gpuNum') - - deep_update(config, it_config['all']) - deep_update(config, it_config[training_service]) - - dump_yml_content(new_config_file, config) - - return new_config_file, config - -def run_test(config_file, training_service, local_gpu=False): - '''run test per configuration file''' - - new_config_file, config = gen_new_config(config_file, training_service) - print(json.dumps(config, sort_keys=True, indent=4)) - - if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0: - print('no gpu, skiping: ', config_file) - return - - try: - proc = subprocess.run(['nnictl', 'create', '--config', new_config_file]) - assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode - - max_duration, max_trial_num = get_max_values(new_config_file) - sleep_interval = 3 - - for _ in range(0, max_duration+30, sleep_interval): - time.sleep(sleep_interval) - status = get_experiment_status(STATUS_URL) - if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): - break - - print_failed_job_log(config['trainingServicePlatform'], TRIAL_JOBS_URL) - if status != 'DONE' or get_succeeded_trial_num(TRIAL_JOBS_URL) < max_trial_num: - raise AssertionError('Failed to finish in maxExecDuration') - finally: - if os.path.exists(new_config_file): - os.remove(new_config_file) - -def get_max_values(config_file): - '''Get maxExecDuration and maxTrialNum of experiment''' - experiment_config = get_yml_content(config_file) - return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum'] - -def run(args): - '''test all configuration files''' - if args.config is None: - assert args.config_dir is not None - config_files = glob.glob('{}/**/*.test.yml'.format(args.config_dir)) - else: - config_files = args.config.split(',') - - if args.exclude is not None: - exclude_paths = args.exclude.split(',') - if exclude_paths: - for exclude_path in exclude_paths: - config_files = [x for x in config_files if exclude_path not in x] - print(config_files) - - for config_file in config_files: - try: - # sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict - time.sleep(5) - print(GREEN + 'Testing:' + config_file + CLEAR) - begin_time = time.time() - run_test(config_file, args.ts, args.local_gpu) - print(GREEN + 'Test %s: TEST PASS IN %d mins' % (config_file, (time.time() - begin_time)/60) + CLEAR) - except Exception as error: - print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR) - print('%r' % error) - traceback.print_exc() - raise error - finally: - subprocess.run(['nnictl', 'stop']) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--config", type=str, default=None) - parser.add_argument("--config_dir", type=str, default=None) - parser.add_argument("--exclude", type=str, default=None) - parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'paiYarn', 'kubeflow', 'frameworkcontroller'], default='local') - parser.add_argument("--local_gpu", action='store_true') - parser.add_argument("--preinstall", action='store_true') - args = parser.parse_args() - - setup_experiment(args.preinstall) - - run(args) diff --git a/test/nni_test/nnitest/generate_ts_config.py b/test/nni_test/nnitest/generate_ts_config.py index ab5340b724..b76c90d4af 100644 --- a/test/nni_test/nnitest/generate_ts_config.py +++ b/test/nni_test/nnitest/generate_ts_config.py @@ -87,18 +87,6 @@ def update_training_service_config(args): dump_yml_content(TRAINING_SERVICE_FILE, config) -def convert_command(): - '''convert command by platform''' - if sys.platform != 'win32': - return None - config_files = glob.glob('./**/*.yml') + glob.glob('./**/**/*.yml') - for config_file in config_files: - print('processing {}'.format(config_file)) - yml_content = get_yml_content(config_file) - if yml_content.get('trial'): - if yml_content['trial'].get('command'): - yml_content['trial']['command'] = yml_content['trial']['command'].replace('python3', 'python') - dump_yml_content(config_file, yml_content) if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/test/nni_test/nnitest/metrics_test.py b/test/nni_test/nnitest/metrics_test.py deleted file mode 100644 index c899bf296a..0000000000 --- a/test/nni_test/nnitest/metrics_test.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import sys -import os.path as osp -import subprocess -import time -import traceback -import json -import requests - -from utils import get_experiment_status, get_yml_content, parse_max_duration_time, get_succeeded_trial_num, print_failed_job_log -from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, METRICS_URL - -def run_test(): - '''run metrics test''' - if sys.platform == 'win32': - config_file = osp.join('metrics_test', 'metrics_win32.test.yml') - else: - config_file = osp.join('metrics_test', 'metrics.test.yml') - - print('Testing %s...' % config_file) - proc = subprocess.run(['nnictl', 'create', '--config', config_file]) - assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode - - max_duration, max_trial_num = get_max_values(config_file) - sleep_interval = 3 - - for _ in range(0, max_duration, sleep_interval): - time.sleep(sleep_interval) - status = get_experiment_status(STATUS_URL) - #print('experiment status:', status) - if status == 'DONE': - num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL) - print_failed_job_log('local', TRIAL_JOBS_URL) - if sys.platform == "win32": - time.sleep(sleep_interval) # Windows seems to have some issues on updating in time - assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num) - check_metrics() - break - - assert status == 'DONE', 'Failed to finish in maxExecDuration' - -def check_metrics(): - with open(osp.join('metrics_test', 'expected_metrics.json'), 'r') as f: - expected_metrics = json.load(f) - print(expected_metrics) - metrics = requests.get(METRICS_URL).json() - intermediate_result, final_result = get_metric_results(metrics) - assert len(final_result) == 1, 'there should be 1 final result' - assert final_result[0] == expected_metrics['final_result'] - assert set(intermediate_result) == set(expected_metrics['intermediate_result']) - -def get_metric_results(metrics): - intermediate_result = [] - final_result = [] - for metric in metrics: - if metric['type'] == 'PERIODICAL': - intermediate_result.append(json.loads(metric['data'])) - elif metric['type'] == 'FINAL': - final_result.append(json.loads(metric['data'])) - print(intermediate_result, final_result) - - return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result] - -def get_max_values(config_file): - experiment_config = get_yml_content(config_file) - return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum'] - -if __name__ == '__main__': - try: - # sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict - time.sleep(5) - run_test() - print(GREEN + 'TEST PASS' + CLEAR) - except Exception as error: - print(RED + 'TEST FAIL' + CLEAR) - print('%r' % error) - traceback.print_exc() - raise error - finally: - subprocess.run(['nnictl', 'stop']) diff --git a/test/nni_test/nnitest/tuner_test.py b/test/nni_test/nnitest/tuner_test.py deleted file mode 100644 index 41581ea9fa..0000000000 --- a/test/nni_test/nnitest/tuner_test.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import sys -import os.path as osp -import subprocess -import sys -import time -import traceback - -from utils import get_yml_content, dump_yml_content, setup_experiment, get_nni_log_path, is_experiment_done -from utils import GREEN, RED, CLEAR, EXPERIMENT_URL - -TUNER_LIST = ['GridSearch', 'BatchTuner', 'TPE', 'Random', 'Anneal', 'Evolution'] -ASSESSOR_LIST = ['Medianstop'] - - -def get_config_file_path(): - if sys.platform == 'win32': - config_file = osp.join('config', 'tuner_test', 'local_win32.yml') - else: - config_file = osp.join('config', 'tuner_test', 'local.yml') - return config_file - -def switch(dispatch_type, dispatch_name): - '''Change dispatch in config.yml''' - config_path = get_config_file_path() - experiment_config = get_yml_content(config_path) - if dispatch_name in ['GridSearch', 'BatchTuner', 'Random']: - experiment_config[dispatch_type.lower()] = { - 'builtin' + dispatch_type + 'Name': dispatch_name - } - else: - experiment_config[dispatch_type.lower()] = { - 'builtin' + dispatch_type + 'Name': dispatch_name, - 'classArgs': { - 'optimize_mode': 'maximize' - } - } - if dispatch_name == 'BatchTuner': - experiment_config['searchSpacePath'] = 'batchtuner_search_space.json' - else: - experiment_config['searchSpacePath'] = 'search_space.json' - dump_yml_content(config_path, experiment_config) - -def test_builtin_dispatcher(dispatch_type, dispatch_name): - '''test a dispatcher whose type is dispatch_type and name is dispatch_name''' - switch(dispatch_type, dispatch_name) - - print('Testing %s...' % dispatch_name) - proc = subprocess.run(['nnictl', 'create', '--config', get_config_file_path()]) - assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode - - nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL) - - for _ in range(20): - time.sleep(3) - # check if experiment is done - experiment_status = is_experiment_done(nnimanager_log_path) - if experiment_status: - break - - assert experiment_status, 'Failed to finish in 1 min' - -def run(dispatch_type): - '''test all dispatchers whose type is dispatch_type''' - assert dispatch_type in ['Tuner', 'Assessor'], 'Unsupported dispatcher type: %s' % (dispatch_type) - dipsatcher_list = TUNER_LIST if dispatch_type == 'Tuner' else ASSESSOR_LIST - for dispatcher_name in dipsatcher_list: - try: - # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict - time.sleep(6) - test_builtin_dispatcher(dispatch_type, dispatcher_name) - print(GREEN + 'Test %s %s: TEST PASS' % (dispatcher_name, dispatch_type) + CLEAR) - except Exception as error: - print(RED + 'Test %s %s: TEST FAIL' % (dispatcher_name, dispatch_type) + CLEAR) - print('%r' % error) - traceback.print_exc() - raise error - finally: - subprocess.run(['nnictl', 'stop']) - -if __name__ == '__main__': - installed = (sys.argv[-1] != '--preinstall') - setup_experiment(installed) - - run('Tuner') - run('Assessor') diff --git a/test/nni_test/nnitest/utils.py b/test/nni_test/nnitest/utils.py index 31c2427d78..e16b4dc949 100644 --- a/test/nni_test/nnitest/utils.py +++ b/test/nni_test/nnitest/utils.py @@ -17,11 +17,12 @@ RED = '\33[31m' CLEAR = '\33[0m' -REST_ENDPOINT = 'http://localhost:8080/api/v1/nni' -EXPERIMENT_URL = REST_ENDPOINT + '/experiment' -STATUS_URL = REST_ENDPOINT + '/check-status' -TRIAL_JOBS_URL = REST_ENDPOINT + '/trial-jobs' -METRICS_URL = REST_ENDPOINT + '/metric-data' +REST_ENDPOINT = 'http://localhost:8080' +API_ROOT_URL = REST_ENDPOINT + '/api/v1/nni' +EXPERIMENT_URL = API_ROOT_URL + '/experiment' +STATUS_URL = API_ROOT_URL + '/check-status' +TRIAL_JOBS_URL = API_ROOT_URL + '/trial-jobs' +METRICS_URL = API_ROOT_URL + '/metric-data' def read_last_line(file_name): '''read last line of a file and return None if file not found''' diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py index efeaf2446c..1032720747 100644 --- a/test/nni_test/nnitest/validators.py +++ b/test/nni_test/nnitest/validators.py @@ -4,17 +4,17 @@ import os.path as osp import json import requests -from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, METRICS_URL +import nnicli as nc +from utils import METRICS_URL class ITValidator: - def __call__(self, api_root_url, experiment_dir, nni_source_dir): + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): pass class MetricsValidator(ITValidator): - def __call__(self, api_root_url, experiment_dir, nni_source_dir): - #print('VALIDATOR CALLED!!!') + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): self.check_metrics(nni_source_dir) def check_metrics(self, nni_source_dir): @@ -50,3 +50,12 @@ def get_metric_results(self, metrics): else: final_result[metric['trialJobId']] = [metric_value] return intermediate_result, final_result + +class NnicliValidator(ITValidator): + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): + print(rest_endpoint) + nc.set_endpoint(rest_endpoint) + #print(nc.version()) + print(nc.get_job_statistics()) + print(nc.get_experiment_status()) + print(nc.list_trial_jobs()) diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index 3fe4d7f48d..e01e7c1b23 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -9,7 +9,7 @@ jobs: - script: | python -m pip install scikit-learn==0.20.0 --user python -m pip install keras==2.1.6 --user - python -m pip install torch===1.2.0 torchvision===0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user + python -m pip install torch===1.3.1 torchvision===0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user python -m pip install tensorflow-gpu==1.11.0 --user displayName: 'Install dependencies for integration tests' - script: | diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index b3dca9243c..00fb26169f 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -10,8 +10,8 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | python3 -m pip install scikit-learn==0.20.0 --user - python3 -m pip install torch==1.2.0 --user - python3 -m pip install torchvision==0.4.0 --user + python3 -m pip install torch==1.3.1 --user + python3 -m pip install torchvision==0.4.1 --user python3 -m pip install keras==2.1.6 --user python3 -m pip install tensorflow-gpu==1.15 --user sudo apt-get install swig -y From faf3e192b5f346e05cef045041210b4ca491ddb5 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 18 Mar 2020 23:39:29 +0800 Subject: [PATCH 10/38] Fix trialkeeper flush (#2174) (#2177) Co-authored-by: SparkSnail --- tools/nni_trial_tool/log_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/nni_trial_tool/log_utils.py b/tools/nni_trial_tool/log_utils.py index 59261d2950..20c8e74c09 100644 --- a/tools/nni_trial_tool/log_utils.py +++ b/tools/nni_trial_tool/log_utils.py @@ -85,6 +85,13 @@ def get_pipelog_reader(self): ''' return PipeLogReader(self.logger, self.log_collection, logging.INFO) + def flush(self): + ''' + Add flush in handler + ''' + for handler in self.logger.handlers: + handler.flush() + def write(self, buf): ''' Write buffer data into logger/stdout From 02bd88e422e76b60763ce06e609755f3c80f6817 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 00:15:25 +0800 Subject: [PATCH 11/38] Fix torch version (#2178) --- test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-local.yml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index e01e7c1b23..a63165e719 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -9,7 +9,7 @@ jobs: - script: | python -m pip install scikit-learn==0.20.0 --user python -m pip install keras==2.1.6 --user - python -m pip install torch===1.3.1 torchvision===0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user + python -m pip install torch===1.13.1 torchvision===0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user python -m pip install tensorflow-gpu==1.11.0 --user displayName: 'Install dependencies for integration tests' - script: | diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index 00fb26169f..7708285ec3 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -10,7 +10,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | python3 -m pip install scikit-learn==0.20.0 --user - python3 -m pip install torch==1.3.1 --user + python3 -m pip install torch==1.13.1 --user python3 -m pip install torchvision==0.4.1 --user python3 -m pip install keras==2.1.6 --user python3 -m pip install tensorflow-gpu==1.15 --user @@ -26,11 +26,11 @@ jobs: cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local - displayName: 'Integration test' - script: | cd test source scripts/model_compression.sh displayName: 'Model compression test' + - script: | + cd test + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local + displayName: 'Integration test' From 4d0f0322ecfce548cb7473e1356898e5877cfd1f Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 00:39:01 +0800 Subject: [PATCH 12/38] fix torch version (#2179) --- test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-local.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index a63165e719..7691ad19fe 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -9,7 +9,7 @@ jobs: - script: | python -m pip install scikit-learn==0.20.0 --user python -m pip install keras==2.1.6 --user - python -m pip install torch===1.13.1 torchvision===0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user + python -m pip install torchvision===0.4.1 torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html --user python -m pip install tensorflow-gpu==1.11.0 --user displayName: 'Install dependencies for integration tests' - script: | diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index 7708285ec3..b0d385f630 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -10,8 +10,8 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | python3 -m pip install scikit-learn==0.20.0 --user - python3 -m pip install torch==1.13.1 --user python3 -m pip install torchvision==0.4.1 --user + python3 -m pip install torch==1.3.1 --user python3 -m pip install keras==2.1.6 --user python3 -m pip install tensorflow-gpu==1.15 --user sudo apt-get install swig -y From 4fab5b6b272ffedb668c22b922991db59cf7d263 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 01:21:51 +0800 Subject: [PATCH 13/38] Fix nnictl view test (#2180) --- test/config/integration_tests.yml | 5 +++++ test/nni_test/nnitest/run_tests.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 9d52bee66a..7e0e80ce60 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -2,6 +2,7 @@ defaultTestCaseConfig: launchCommand: nnictl create --config $configFile stopCommand: nnictl stop + experimentStatusCheck: True testCases: ####################################################################### @@ -28,6 +29,9 @@ testCases: # set experiment ID into variable, variable name should start with $, such as $expId setExperimentIdtoVar: $expId + # check status of experiment before calling validator + experimentStatusCheck: True + - name: sklearn-regression configFile: test/config/examples/sklearn-regression.yml @@ -92,6 +96,7 @@ testCases: - name: nnictl-view configFile: test/config/examples/sklearn-regression.yml launchCommand: nnictl view $resumeExpId + experimentStatusCheck: False - name: multi-thread configFile: test/config/multi_thread/config.yml diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index d387f814a2..da845414fa 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -113,6 +113,9 @@ def launch_test(config_file, training_service, test_case_config): max_duration, max_trial_num = get_max_values(config_file) sleep_interval = 3 + if not test_case_config.get('experimentStatusCheck'): + return + for _ in range(0, max_duration+10, sleep_interval): time.sleep(sleep_interval) status = get_experiment_status(STATUS_URL) From b9eba866f0b4c44d53d3980ca9e05f813221a460 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 01:30:12 +0800 Subject: [PATCH 14/38] Fix model compression example (#2181) --- examples/model_compress/model_prune_torch.py | 2 +- test/pipelines/pipelines-it-local.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/model_compress/model_prune_torch.py b/examples/model_compress/model_prune_torch.py index fb64ecf351..d3eac153a7 100644 --- a/examples/model_compress/model_prune_torch.py +++ b/examples/model_compress/model_prune_torch.py @@ -67,7 +67,7 @@ 'dataset_name': 'cifar10', 'model_name': 'vgg16', 'pruner_class': ActivationMeanRankFilterPruner, - 'configure_list': [{ + 'config_list': [{ 'sparsity': 0.5, 'op_types': ['default'], 'op_names': ['feature.0', 'feature.24', 'feature.27', 'feature.30', 'feature.34', 'feature.37'] diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index b0d385f630..e4ab5e39e6 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -26,11 +26,11 @@ jobs: cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - - script: | - cd test - source scripts/model_compression.sh - displayName: 'Model compression test' - script: | cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local displayName: 'Integration test' + - script: | + cd test + source scripts/model_compression.sh + displayName: 'Model compression test' From 0c6c5afdcfd5e50a6ef12921aba60fb2b4085a9b Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 02:48:43 +0800 Subject: [PATCH 15/38] fix remote pipeline (#2182) --- test/config/assessors/trial.py | 2 +- test/pipelines/pipelines-it-remote-windows.yml | 2 +- test/pipelines/pipelines-it-remote.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/config/assessors/trial.py b/test/config/assessors/trial.py index 20cbd89f5a..e4bdee0fd9 100644 --- a/test/config/assessors/trial.py +++ b/test/config/assessors/trial.py @@ -15,7 +15,7 @@ nni.get_next_parameter() for i in range(20): time.sleep(1) - for _ in range(5): + for _ in range(2): if up: v *= 1.1 else: diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index 481a31353e..73f5950ba4 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -50,7 +50,7 @@ jobs: inputs: sshEndpoint: $(end_point) runOptions: commands - commands: python3 /tmp/nnitest/$(Build.BuildId)/nni-remote/test/remote_docker.py --mode stop --name $(Build.BuildId) --os windows + commands: python3 /tmp/nnitest/$(Build.BuildId)/nni-remote/test/nni_test/nnitest/remote_docker.py --mode stop --name $(Build.BuildId) --os windows displayName: 'Stop docker' - task: SSH@0 inputs: diff --git a/test/pipelines/pipelines-it-remote.yml b/test/pipelines/pipelines-it-remote.yml index c8abae058f..4eab1cf650 100644 --- a/test/pipelines/pipelines-it-remote.yml +++ b/test/pipelines/pipelines-it-remote.yml @@ -62,5 +62,5 @@ jobs: inputs: sshEndpoint: $(end_point) runOptions: commands - commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode stop --name $(Build.BuildId) + commands: python3 /tmp/nnitest/$(Build.BuildId)/test/nni_test/nnitest/remote_docker.py --mode stop --name $(Build.BuildId) displayName: 'Stop docker' From d5d517c2e970f8e0785c4b8bd3c56097237e276f Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 14:53:11 +0800 Subject: [PATCH 16/38] Add dict metrics test (#2186) --- test/config/integration_tests.yml | 22 +++++++-- .../{metrics.test.yml => config.yml} | 0 ...win32.test.yml => config_dict_metrics.yml} | 2 +- .../metrics_test/expected_metrics_dict.json | 11 +++++ test/config/metrics_test/trial.py | 28 ++++++++---- test/nni_test/nnitest/run_tests.py | 45 ++++++++++++------- test/nni_test/nnitest/validators.py | 22 +++++---- test/scripts/model_compression.sh | 5 +-- 8 files changed, 94 insertions(+), 41 deletions(-) rename test/config/metrics_test/{metrics.test.yml => config.yml} (100%) rename test/config/metrics_test/{metrics_win32.test.yml => config_dict_metrics.yml} (87%) create mode 100644 test/config/metrics_test/expected_metrics_dict.json diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 7e0e80ce60..31359b1e84 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -66,12 +66,25 @@ testCases: ######################################################################### # nni features test ######################################################################### -- name: metrics_test - configFile: test/config/metrics_test/metrics.test.yml +- name: metrics_float + configFile: test/config/metrics_test/config.yml config: maxTrialNum: 1 trialConcurrency: 1 - validator: MetricsValidator + validator: + class: MetricsValidator + kwargs: + expected_result_file: expected_metrics.json + +- name: metrics_dict + configFile: test/config/metrics_test/config_dict_metrics.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: MetricsValidator + kwargs: + expected_result_file: expected_metrics_dict.json - name: nnicli configFile: test/config/examples/sklearn-regression.yml @@ -80,7 +93,8 @@ testCases: trialConcurrency: 4 launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' - validator: NnicliValidator + validator: + class: NnicliValidator # Experiment resume test part 1 - name: nnictl-resume-1 diff --git a/test/config/metrics_test/metrics.test.yml b/test/config/metrics_test/config.yml similarity index 100% rename from test/config/metrics_test/metrics.test.yml rename to test/config/metrics_test/config.yml diff --git a/test/config/metrics_test/metrics_win32.test.yml b/test/config/metrics_test/config_dict_metrics.yml similarity index 87% rename from test/config/metrics_test/metrics_win32.test.yml rename to test/config/metrics_test/config_dict_metrics.yml index 39dfb662e8..286363dffe 100644 --- a/test/config/metrics_test/metrics_win32.test.yml +++ b/test/config/metrics_test/config_dict_metrics.yml @@ -10,7 +10,7 @@ tuner: trial: codeDir: . - command: python trial.py + command: python3 trial.py --dict_metrics gpuNum: 0 useAnnotation: false diff --git a/test/config/metrics_test/expected_metrics_dict.json b/test/config/metrics_test/expected_metrics_dict.json new file mode 100644 index 0000000000..c3d57f88af --- /dev/null +++ b/test/config/metrics_test/expected_metrics_dict.json @@ -0,0 +1,11 @@ +{ + "intermediate_result": [ + {"default": 0.1, "loss": 0.11, "other": 0.111}, + {"default": 0.2, "loss": 0.22, "other": 0.222}, + {"default": 0.3, "loss": 0.33, "other": 0.333}, + {"default": 0.4, "loss": 0.44, "other": 0.444}, + {"default": 0.5, "loss": 0.55, "other": 0.555} + + ], + "final_result": {"default": 0.6, "loss": 0.66, "other": 0.666} +} diff --git a/test/config/metrics_test/trial.py b/test/config/metrics_test/trial.py index d9a61372ab..5c12afef44 100644 --- a/test/config/metrics_test/trial.py +++ b/test/config/metrics_test/trial.py @@ -2,18 +2,28 @@ # Licensed under the MIT license. import time +import json +import argparse import nni if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--dict_metrics", action='store_true') + args = parser.parse_args() + + if args.dict_metrics: + result_file = 'expected_metrics_dict.json' + else: + result_file = 'expected_metrics.json' + nni.get_next_parameter() + with open(result_file, 'r') as f: + m = json.load(f) + for v in m['intermediate_result']: + time.sleep(1) + print('report_intermediate_result:', v) + nni.report_intermediate_result(v) time.sleep(1) - for i in range(10): - if i % 2 == 0: - print('report intermediate result without end of line.', end='') - else: - print('report intermediate result.') - nni.report_intermediate_result(0.1*(i+1)) - time.sleep(2) - print('test final metrics not at line start.', end='') - nni.report_final_result(1.0) + print('report_final_result:', m['final_result']) + nni.report_final_result(m['final_result']) print('done') diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index da845414fa..56055f7653 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -37,19 +37,10 @@ def update_training_service_config(config, training_service): deep_update(config, it_ts_config['all']) deep_update(config, it_ts_config[training_service]) -def run_test_case(test_case_config, it_config, args): - # fill test case default config - for k in it_config['defaultTestCaseConfig']: - if k not in test_case_config: - test_case_config[k] = it_config['defaultTestCaseConfig'][k] - print(json.dumps(test_case_config, indent=4)) - +def prepare_config_file(test_case_config, it_config, args): config_path = os.path.join(args.nni_source_dir, test_case_config['configFile']) test_yml_config = get_yml_content(config_path) - # apply training service config - update_training_service_config(test_yml_config, args.ts) - # apply test case specific config if test_case_config.get('config') is not None: deep_update(test_yml_config, test_case_config['config']) @@ -58,21 +49,32 @@ def run_test_case(test_case_config, it_config, args): if sys.platform == 'win32': test_yml_config['trial']['command'] = test_yml_config['trial']['command'].replace('python3', 'python') + # apply training service config + # user's gpuNum, logCollection config is overwritten by the config in training_service.yml + # the hack for kubeflow should be applied at last step + update_training_service_config(test_yml_config, args.ts) + # generate temporary config yml file to launch experiment new_config_file = config_path + '.tmp' dump_yml_content(new_config_file, test_yml_config) print(yaml.dump(test_yml_config, default_flow_style=False)) + return new_config_file + +def run_test_case(test_case_config, it_config, args): + # fill test case default config + for k in it_config['defaultTestCaseConfig']: + if k not in test_case_config: + test_case_config[k] = it_config['defaultTestCaseConfig'][k] + print(json.dumps(test_case_config, indent=4)) + + new_config_file = prepare_config_file(test_case_config, it_config, args) # set configFile variable it_variables['$configFile'] = new_config_file try: launch_test(new_config_file, args.ts, test_case_config) - - validator_name = test_case_config.get('validator') - if validator_name is not None: - validator = validators.__dict__[validator_name]() - validator(REST_ENDPOINT, None, args.nni_source_dir) + invoke_validator(test_case_config, args.nni_source_dir) finally: print('Stop command:', test_case_config.get('stopCommand')) if test_case_config.get('stopCommand'): @@ -81,6 +83,16 @@ def run_test_case(test_case_config, it_config, args): if os.path.exists(new_config_file): os.remove(new_config_file) +def invoke_validator(test_case_config, nni_source_dir): + validator_config = test_case_config.get('validator') + if validator_config is None or validator_config.get('class') is None: + return + + validator = validators.__dict__[validator_config.get('class')]() + kwargs = validator_config.get('kwargs', {}) + print('kwargs:', kwargs) + validator(REST_ENDPOINT, None, nni_source_dir, **kwargs) + def get_max_values(config_file): '''Get maxExecDuration and maxTrialNum of experiment''' experiment_config = get_yml_content(config_file) @@ -152,7 +164,8 @@ def run(args): begin_time = time.time() run_test_case(test_case_config, it_config, args) - print(GREEN + 'Test %s: TEST PASS IN %d mins' % (name, (time.time() - begin_time)/60) + CLEAR) + print('{}Test {}: TEST PASS IN {} SECONDS{}'.format(GREEN, name, int(time.time()-begin_time), CLEAR), flush=True) + if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py index 1032720747..2fc43abe89 100644 --- a/test/nni_test/nnitest/validators.py +++ b/test/nni_test/nnitest/validators.py @@ -9,19 +9,21 @@ class ITValidator: - def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): pass class MetricsValidator(ITValidator): - def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): - self.check_metrics(nni_source_dir) + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): + self.check_metrics(nni_source_dir, **kwargs) - def check_metrics(self, nni_source_dir): - with open(osp.join(nni_source_dir, 'test', 'config', 'metrics_test', 'expected_metrics.json'), 'r') as f: + def check_metrics(self, nni_source_dir, **kwargs): + expected_result_file = kwargs.get('expected_result_file', 'expected_metrics.json') + with open(osp.join(nni_source_dir, 'test', 'config', 'metrics_test', expected_result_file), 'r') as f: expected_metrics = json.load(f) print('expected metrics:', expected_metrics) metrics = requests.get(METRICS_URL).json() + print('RAW METRICS:', json.dumps(metrics, indent=4)) intermediate_result, final_result = self.get_metric_results(metrics) assert intermediate_result and final_result @@ -32,13 +34,17 @@ def check_metrics(self, nni_source_dir): print('final result:', trial_final_result) assert len(trial_final_result) == 1, 'there should be 1 final result' assert trial_final_result[0] == expected_metrics['final_result'] - assert set(trial_intermediate_result) == set(expected_metrics['intermediate_result']) + # encode dict/number into json string to compare them in set + assert set([json.dumps(x) for x in trial_intermediate_result]) \ + == set([json.dumps(x) for x in expected_metrics['intermediate_result']]) def get_metric_results(self, metrics): intermediate_result = {} final_result = {} for metric in metrics: - metric_value = round(float(json.loads(metric['data'])), 2) + # metrics value are encoded by NNI SDK as json string, + # here we decode the value by json.loads twice + metric_value = json.loads(json.loads(metric['data'])) if metric['type'] == 'PERIODICAL': if metric['trialJobId'] in intermediate_result: intermediate_result[metric['trialJobId']].append(metric_value) @@ -52,7 +58,7 @@ def get_metric_results(self, metrics): return intermediate_result, final_result class NnicliValidator(ITValidator): - def __call__(self, rest_endpoint, experiment_dir, nni_source_dir): + def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): print(rest_endpoint) nc.set_endpoint(rest_endpoint) #print(nc.version()) diff --git a/test/scripts/model_compression.sh b/test/scripts/model_compression.sh index ade720f86c..b1051f69e4 100644 --- a/test/scripts/model_compression.sh +++ b/test/scripts/model_compression.sh @@ -27,12 +27,11 @@ do python3 model_prune_torch.py --pruner_name $name --pretrain_epochs 1 --prune_epochs 1 done -echo "testing lottery ticket pruning..." -python3 lottery_torch_mnist_fc.py +#echo "testing lottery ticket pruning..." +#python3 lottery_torch_mnist_fc.py echo "" echo "===========================Testing: quantizers===========================" -cd ${CWD}/../examples/model_compress echo "testing QAT quantizer..." python3 QAT_torch_quantizer.py From 2fad4eba3434585eeb9ad1e93bd8cc2644319717 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 15:38:15 +0800 Subject: [PATCH 17/38] add test timestamp (#2188) --- test/nni_test/nnitest/run_tests.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 56055f7653..b150f4dadf 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -6,6 +6,7 @@ import argparse import subprocess import time +import datetime import shlex import traceback import json @@ -123,16 +124,22 @@ def launch_test(config_file, training_service, test_case_config): print('variables:', it_variables) max_duration, max_trial_num = get_max_values(config_file) - sleep_interval = 3 + print('max_duration:', max_duration, ' max_trial_num:', max_trial_num) if not test_case_config.get('experimentStatusCheck'): return - for _ in range(0, max_duration+10, sleep_interval): - time.sleep(sleep_interval) + bg_time = time.time() + print(str(datetime.datetime.now()), ' waiting ...') + while True: + time.sleep(3) + if time.time() - bg_time > max_duration+10: + break status = get_experiment_status(STATUS_URL) if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): break + print(str(datetime.datetime.now()), ' waiting done') + trial_stats = get_trial_stats(TRIAL_JOBS_URL) print(json.dumps(trial_stats, indent=4), flush=True) if status != 'DONE' or trial_stats['SUCCEEDED'] + trial_stats['EARLY_STOPPED'] < max_trial_num: From e9865b175011bd6c88ad681f343aba490f8eff78 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 15:46:02 +0800 Subject: [PATCH 18/38] disable dict metrics test (#2189) --- test/config/integration_tests.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 31359b1e84..c310250d91 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -76,15 +76,16 @@ testCases: kwargs: expected_result_file: expected_metrics.json -- name: metrics_dict - configFile: test/config/metrics_test/config_dict_metrics.yml - config: - maxTrialNum: 1 - trialConcurrency: 1 - validator: - class: MetricsValidator - kwargs: - expected_result_file: expected_metrics_dict.json +# to be enabled +#- name: metrics_dict +# configFile: test/config/metrics_test/config_dict_metrics.yml +# config: +# maxTrialNum: 1 +# trialConcurrency: 1 +# validator: +# class: MetricsValidator +# kwargs: +# expected_result_file: expected_metrics_dict.json - name: nnicli configFile: test/config/examples/sklearn-regression.yml From cdf78f418b3baf828a6461c90c4dfa812fd62764 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 16:41:45 +0800 Subject: [PATCH 19/38] Fix remote windows (#2191) --- test/nni_test/nnitest/run_tests.py | 8 ++++---- test/scripts/model_compression.sh | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index b150f4dadf..07de56f836 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -47,7 +47,7 @@ def prepare_config_file(test_case_config, it_config, args): deep_update(test_yml_config, test_case_config['config']) # hack for windows - if sys.platform == 'win32': + if sys.platform == 'win32' and args.ts == 'local': test_yml_config['trial']['command'] = test_yml_config['trial']['command'].replace('python3', 'python') # apply training service config @@ -58,7 +58,7 @@ def prepare_config_file(test_case_config, it_config, args): # generate temporary config yml file to launch experiment new_config_file = config_path + '.tmp' dump_yml_content(new_config_file, test_yml_config) - print(yaml.dump(test_yml_config, default_flow_style=False)) + print(yaml.dump(test_yml_config, default_flow_style=False), flush=True) return new_config_file @@ -130,7 +130,7 @@ def launch_test(config_file, training_service, test_case_config): return bg_time = time.time() - print(str(datetime.datetime.now()), ' waiting ...') + print(str(datetime.datetime.now()), ' waiting ...', flush=True) while True: time.sleep(3) if time.time() - bg_time > max_duration+10: @@ -138,7 +138,7 @@ def launch_test(config_file, training_service, test_case_config): status = get_experiment_status(STATUS_URL) if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): break - print(str(datetime.datetime.now()), ' waiting done') + print(str(datetime.datetime.now()), ' waiting done', flush=True) trial_stats = get_trial_stats(TRIAL_JOBS_URL) print(json.dumps(trial_stats, indent=4), flush=True) diff --git a/test/scripts/model_compression.sh b/test/scripts/model_compression.sh index b1051f69e4..969b100e94 100644 --- a/test/scripts/model_compression.sh +++ b/test/scripts/model_compression.sh @@ -32,12 +32,12 @@ done echo "" echo "===========================Testing: quantizers===========================" +# to be enabled +#echo "testing QAT quantizer..." +#python3 QAT_torch_quantizer.py -echo "testing QAT quantizer..." -python3 QAT_torch_quantizer.py +#echo "testing DoReFa quantizer..." +#python3 DoReFaQuantizer_torch_mnist.py -echo "testing DoReFa quantizer..." -python3 DoReFaQuantizer_torch_mnist.py - -echo "testing BNN quantizer..." -python3 BNN_quantizer_cifar10.py +#echo "testing BNN quantizer..." +#python3 BNN_quantizer_cifar10.py From 4df0b72b58fe855f1abeb465fe3a66632becbf16 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 16:55:19 +0800 Subject: [PATCH 20/38] change kubeflow IT config (#2192) --- test/config/training_service.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/config/training_service.yml b/test/config/training_service.yml index 040342da67..65f2676960 100644 --- a/test/config/training_service.yml +++ b/test/config/training_service.yml @@ -4,6 +4,10 @@ all: kubeflow: maxExecDuration: 15m nniManagerIp: + # use a small trial number to make IT faster + maxTrialNum: 2 + trialConcurrency: 2 + kubeflowConfig: operator: tf-operator apiVersion: v1alpha2 @@ -27,6 +31,9 @@ kubeflow: frameworkcontroller: maxExecDuration: 15m nniManagerIp: + # use a small trial number to make IT faster + maxTrialNum: 2 + trialConcurrency: 2 frameworkcontrollerConfig: serviceAccountName: frameworkbarrier storage: azureStorage From a8c9aeb714dd436daa6b626e86a798763943f8d0 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 17:33:42 +0800 Subject: [PATCH 21/38] Fix windows launch command (#2193) --- test/nni_test/nnitest/run_tests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 07de56f836..b2e7108692 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -106,6 +106,11 @@ def get_launch_command(test_case_config): # replace variables for k in it_variables: launch_command = launch_command.replace(k, it_variables[k]) + + # hack for windows, not limited to local training service + if sys.platform == 'win32': + launch_command = launch_command.replace('python3', 'python') + print('launch command: ', launch_command) return launch_command From 210057ef5e367d65411f6a8ba12e36aeb76406bb Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 17:50:23 +0800 Subject: [PATCH 22/38] Refine test case names (#2194) --- test/config/integration_tests.yml | 6 +++--- test/pipelines/pipelines-it-frameworkcontroller.yml | 2 +- test/pipelines/pipelines-it-kubeflow.yml | 2 +- test/pipelines/pipelines-it-pai-windows.yml | 2 +- test/pipelines/pipelines-it-pai.yml | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index c310250d91..171b06d766 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -47,7 +47,7 @@ testCases: - name: mnist-annotation configFile: test/config/examples/mnist-annotation.yml -- name: cifar10_pytorch +- name: cifar10-pytorch configFile: test/config/examples/cifar10-pytorch.yml config: # this example downloads large pretrained model weights @@ -66,7 +66,7 @@ testCases: ######################################################################### # nni features test ######################################################################### -- name: metrics_float +- name: metrics-float configFile: test/config/metrics_test/config.yml config: maxTrialNum: 1 @@ -77,7 +77,7 @@ testCases: expected_result_file: expected_metrics.json # to be enabled -#- name: metrics_dict +#- name: metrics-dict # configFile: test/config/metrics_test/config_dict_metrics.yml # config: # maxTrialNum: 1 diff --git a/test/pipelines/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml index 9e21054202..0bf005c21c 100644 --- a/test/pipelines/pipelines-it-frameworkcontroller.yml +++ b/test/pipelines/pipelines-it-frameworkcontroller.yml @@ -51,5 +51,5 @@ jobs: --azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip) cat config/training_service.yml - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts frameworkcontroller --exclude multi_phase + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts frameworkcontroller --exclude multi-phase displayName: 'integration test' diff --git a/test/pipelines/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml index e7c38861cc..6a2f2b119f 100644 --- a/test/pipelines/pipelines-it-kubeflow.yml +++ b/test/pipelines/pipelines-it-kubeflow.yml @@ -51,5 +51,5 @@ jobs: --azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip) cat config/training_service.yml - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts kubeflow --exclude multi_phase + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts kubeflow --exclude multi-phase displayName: 'integration test' diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index 6aa5a24a31..606e4ed095 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -64,5 +64,5 @@ jobs: set PATH=$(ENV_PATH) python --version python nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi_phase,smac,bohb + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase,smac,bohb displayName: 'Examples and advanced features tests on pai' \ No newline at end of file diff --git a/test/pipelines/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml index 41c10e788d..43c160a81c 100644 --- a/test/pipelines/pipelines-it-pai.yml +++ b/test/pipelines/pipelines-it-pai.yml @@ -53,5 +53,5 @@ jobs: cd test python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\ --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip) - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi_phase + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase displayName: 'integration test' From 4c690a199c38b865d8bdbdd7025ad44eb65defe7 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 20:27:30 +0800 Subject: [PATCH 23/38] refactor PR pipeline (#2196) --- azure-pipelines.yml | 30 +++---- .../batchtuner_search_space.json | 8 ++ test/config/tuner_naive_trial_test/local.yml | 20 +++++ .../tuner_naive_trial_test/local_win32.yml | 20 +++++ .../tuner_naive_trial_test/naive_trial.py | 10 +++ .../tuner_naive_trial_test/search_space.json | 7 ++ test/nni_test/nnitest/run_tests.py | 11 ++- test/nni_test/nnitest/tuner_test.py | 88 +++++++++++++++++++ 8 files changed, 177 insertions(+), 17 deletions(-) create mode 100644 test/config/tuner_naive_trial_test/batchtuner_search_space.json create mode 100644 test/config/tuner_naive_trial_test/local.yml create mode 100644 test/config/tuner_naive_trial_test/local_win32.yml create mode 100644 test/config/tuner_naive_trial_test/naive_trial.py create mode 100644 test/config/tuner_naive_trial_test/search_space.json create mode 100644 test/nni_test/nnitest/tuner_test.py diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9bc6c582e6..51deec52b2 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -52,23 +52,23 @@ jobs: displayName: 'Run flake8 tests to find Python syntax errors and undefined names' - script: | cd test - source unittest.sh + source scripts/unittest.sh displayName: 'Unit test' - script: | cd test - python3 naive_test.py + python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - script: | cd test - python3 tuner_test.py - displayName: 'Built-in tuners / assessors tests' + python3 nni_test/nnitest/tuner_test.py + displayName: 'Built-in tuners / assessors with naive trial tests' - script: | cd test - python3 metrics_test.py + python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases metrics displayName: 'Trial job metrics test' - script: | cd test - python3 cli_test.py + python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli displayName: 'nnicli test' - script: | cd docs/en_US/ @@ -101,19 +101,19 @@ jobs: displayName: 'Install dependencies' - script: | cd test - source unittest.sh + source scripts/unittest.sh displayName: 'Unit test' - script: | cd test - python3 naive_test.py + python3 nni_test/nnitest/naive_test.py displayName: 'Naive test' - script: | cd test - python3 tuner_test.py - displayName: 'Built-in tuners / assessors tests' + python3 nni_test/nnitest/tuner_test.py + displayName: 'Built-in tuners / assessors with naive trial tests' - script: | cd test - python3 cli_test.py + python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli displayName: 'nnicli test' - job: 'basic_test_pr_Windows' @@ -137,13 +137,13 @@ jobs: displayName: 'Install dependencies' - script: | cd test - powershell.exe -file unittest.ps1 + powershell.exe -file scripts/unittest.ps1 displayName: 'unit test' - script: | cd test - python tuner_test.py - displayName: 'Built-in tuners / assessors tests' + python nni_test/nnitest/tuner_test.py + displayName: 'Built-in tuners / assessors with naive trial tests' - script: | cd test - PATH=$HOME/.local/bin:$PATH python3 cli_test.py + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli displayName: 'nnicli test' diff --git a/test/config/tuner_naive_trial_test/batchtuner_search_space.json b/test/config/tuner_naive_trial_test/batchtuner_search_space.json new file mode 100644 index 0000000000..5d3beeee85 --- /dev/null +++ b/test/config/tuner_naive_trial_test/batchtuner_search_space.json @@ -0,0 +1,8 @@ +{ + "combine_params": + { + "_type" : "choice", + "_value" : [{"x": 1}, + {"x": 100}] + } +} diff --git a/test/config/tuner_naive_trial_test/local.yml b/test/config/tuner_naive_trial_test/local.yml new file mode 100644 index 0000000000..e5598914ad --- /dev/null +++ b/test/config/tuner_naive_trial_test/local.yml @@ -0,0 +1,20 @@ +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +authorName: nni +experimentName: test_sdk +maxExecDuration: 1h +maxTrialNum: 2 +searchSpacePath: search_space.json +trainingServicePlatform: local +trial: + codeDir: . + command: python3 naive_trial.py + gpuNum: 0 +trialConcurrency: 2 +tuner: + builtinTunerName: Evolution + classArgs: + optimize_mode: maximize +useAnnotation: false diff --git a/test/config/tuner_naive_trial_test/local_win32.yml b/test/config/tuner_naive_trial_test/local_win32.yml new file mode 100644 index 0000000000..5cd144a789 --- /dev/null +++ b/test/config/tuner_naive_trial_test/local_win32.yml @@ -0,0 +1,20 @@ +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +authorName: nni +experimentName: test_sdk +maxExecDuration: 1h +maxTrialNum: 2 +searchSpacePath: search_space.json +trainingServicePlatform: local +trial: + codeDir: . + command: python naive_trial.py + gpuNum: 0 +trialConcurrency: 2 +tuner: + builtinTunerName: Evolution + classArgs: + optimize_mode: maximize +useAnnotation: false diff --git a/test/config/tuner_naive_trial_test/naive_trial.py b/test/config/tuner_naive_trial_test/naive_trial.py new file mode 100644 index 0000000000..ba71dedff2 --- /dev/null +++ b/test/config/tuner_naive_trial_test/naive_trial.py @@ -0,0 +1,10 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import nni + +params = nni.get_next_parameter() +print('params:', params) +x = params['x'] + +nni.report_final_result(x) diff --git a/test/config/tuner_naive_trial_test/search_space.json b/test/config/tuner_naive_trial_test/search_space.json new file mode 100644 index 0000000000..f20e76e0c5 --- /dev/null +++ b/test/config/tuner_naive_trial_test/search_space.json @@ -0,0 +1,7 @@ +{ + "x": + { + "_type" : "choice", + "_value" : [1, 100] + } +} diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index b2e7108692..1223e07c1b 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -161,6 +161,13 @@ def case_excluded(name, excludes): return True return False +def case_included(name, cases): + assert cases is not None + for case in cases.split(','): + if case in name: + return True + return False + def run(args): it_config = get_yml_content(args.config) @@ -169,7 +176,7 @@ def run(args): if case_excluded(name, args.exclude): print('{} excluded'.format(name)) continue - if args.case and name and args.case not in name: + if args.cases and not case_included(name, args.cases): continue print('{}Testing: {}{}'.format(GREEN, name, CLEAR)) time.sleep(5) @@ -183,7 +190,7 @@ def run(args): parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, required=True) parser.add_argument("--nni_source_dir", type=str, default='../') - parser.add_argument("--case", type=str, default=None) + parser.add_argument("--cases", type=str, default=None) parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') args = parser.parse_args() diff --git a/test/nni_test/nnitest/tuner_test.py b/test/nni_test/nnitest/tuner_test.py new file mode 100644 index 0000000000..78168f1e0b --- /dev/null +++ b/test/nni_test/nnitest/tuner_test.py @@ -0,0 +1,88 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import sys +import os.path as osp +import subprocess +import sys +import time +import traceback + +from utils import get_yml_content, dump_yml_content, setup_experiment, get_nni_log_path, is_experiment_done +from utils import GREEN, RED, CLEAR, EXPERIMENT_URL + +TUNER_LIST = ['GridSearch', 'BatchTuner', 'TPE', 'Random', 'Anneal', 'Evolution'] +ASSESSOR_LIST = ['Medianstop'] + + +def get_config_file_path(): + if sys.platform == 'win32': + config_file = osp.join('config', 'tuner_naive_trial_test', 'local_win32.yml') + else: + config_file = osp.join('config', 'tuner_naive_trial_test', 'local.yml') + return config_file + +def switch(dispatch_type, dispatch_name): + '''Change dispatch in config.yml''' + config_path = get_config_file_path() + experiment_config = get_yml_content(config_path) + if dispatch_name in ['GridSearch', 'BatchTuner', 'Random']: + experiment_config[dispatch_type.lower()] = { + 'builtin' + dispatch_type + 'Name': dispatch_name + } + else: + experiment_config[dispatch_type.lower()] = { + 'builtin' + dispatch_type + 'Name': dispatch_name, + 'classArgs': { + 'optimize_mode': 'maximize' + } + } + if dispatch_name == 'BatchTuner': + experiment_config['searchSpacePath'] = 'batchtuner_search_space.json' + else: + experiment_config['searchSpacePath'] = 'search_space.json' + dump_yml_content(config_path, experiment_config) + +def test_builtin_dispatcher(dispatch_type, dispatch_name): + '''test a dispatcher whose type is dispatch_type and name is dispatch_name''' + switch(dispatch_type, dispatch_name) + + print('Testing %s...' % dispatch_name) + proc = subprocess.run(['nnictl', 'create', '--config', get_config_file_path()]) + assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode + + nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL) + + for _ in range(20): + time.sleep(3) + # check if experiment is done + experiment_status = is_experiment_done(nnimanager_log_path) + if experiment_status: + break + + assert experiment_status, 'Failed to finish in 1 min' + +def run(dispatch_type): + '''test all dispatchers whose type is dispatch_type''' + assert dispatch_type in ['Tuner', 'Assessor'], 'Unsupported dispatcher type: %s' % (dispatch_type) + dipsatcher_list = TUNER_LIST if dispatch_type == 'Tuner' else ASSESSOR_LIST + for dispatcher_name in dipsatcher_list: + try: + # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict + time.sleep(6) + test_builtin_dispatcher(dispatch_type, dispatcher_name) + print(GREEN + 'Test %s %s: TEST PASS' % (dispatcher_name, dispatch_type) + CLEAR) + except Exception as error: + print(RED + 'Test %s %s: TEST FAIL' % (dispatcher_name, dispatch_type) + CLEAR) + print('%r' % error) + traceback.print_exc() + raise error + finally: + subprocess.run(['nnictl', 'stop']) + +if __name__ == '__main__': + installed = (sys.argv[-1] != '--preinstall') + setup_experiment(installed) + + run('Tuner') + run('Assessor') From df2d84a3a520707d8eb14f2d7a10e77d3a7af3db Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 21:07:10 +0800 Subject: [PATCH 24/38] replace stop command for win32 (#2197) --- test/nni_test/nnitest/run_tests.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 1223e07c1b..fba7392e19 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -77,9 +77,10 @@ def run_test_case(test_case_config, it_config, args): launch_test(new_config_file, args.ts, test_case_config) invoke_validator(test_case_config, args.nni_source_dir) finally: - print('Stop command:', test_case_config.get('stopCommand')) - if test_case_config.get('stopCommand'): - subprocess.run(shlex.split(test_case_config.get('stopCommand'))) + stop_command = get_command(test_case_config, 'stopCommand') + print('Stop command:', stop_command, flush=True) + if stop_command: + subprocess.run(shlex.split(stop_command)) # remove tmp config file if os.path.exists(new_config_file): os.remove(new_config_file) @@ -99,25 +100,29 @@ def get_max_values(config_file): experiment_config = get_yml_content(config_file) return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum'] -def get_launch_command(test_case_config): - launch_command = test_case_config.get('launchCommand') - assert launch_command is not None +def get_command(test_case_config, commandKey): + command = test_case_config.get(commandKey) + if commandKey == 'launchCommand': + assert command is not None + if command is None: + return None # replace variables for k in it_variables: - launch_command = launch_command.replace(k, it_variables[k]) + command = command.replace(k, it_variables[k]) # hack for windows, not limited to local training service if sys.platform == 'win32': - launch_command = launch_command.replace('python3', 'python') + command = command.replace('python3', 'python') - print('launch command: ', launch_command) - return launch_command + return command def launch_test(config_file, training_service, test_case_config): '''run test per configuration file''' + launch_command = get_command(test_case_config, 'launchCommand') + print('launch command: ', launch_command, flush=True) - proc = subprocess.run(shlex.split(get_launch_command(test_case_config))) + proc = subprocess.run(shlex.split(launch_command)) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode From 93e5eb710891aac590d64858f44bdfed9a3f6459 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 19 Mar 2020 21:45:03 +0800 Subject: [PATCH 25/38] exclude nnicli on win32 (#2198) --- test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-pai-windows.yml | 2 +- test/pipelines/pipelines-it-remote-windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index 7691ad19fe..8d79ab6cc0 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -22,5 +22,5 @@ jobs: displayName: 'Naive test' - script: | cd test - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local --exclude smac,bohb + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local --exclude smac,bohb,nnicli displayName: 'Integration tests' diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index 606e4ed095..39a76a768e 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -64,5 +64,5 @@ jobs: set PATH=$(ENV_PATH) python --version python nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase,smac,bohb + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase,smac,bohb,nnicli displayName: 'Examples and advanced features tests on pai' \ No newline at end of file diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index 73f5950ba4..b70438511d 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -44,7 +44,7 @@ jobs: cd test python nni_test/nnitest/generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) --remote_port $(Get-Content port) --remote_pwd $(docker_pwd) --nni_manager_ip $(nni_manager_ip) Get-Content config/training_service.yml - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote --exclude cifar10,smac,bohb + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote --exclude cifar10,smac,bohb,nnicli displayName: 'integration test' - task: SSH@0 inputs: From 468a62a9a68e45000eb0e0fff48763b05cef983c Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 20 Mar 2020 12:26:47 +0800 Subject: [PATCH 26/38] Add nnictl foreground test (#2200) --- test/config/integration_tests.yml | 6 ++++++ test/nni_test/nnitest/foreground.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 test/nni_test/nnitest/foreground.py diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 171b06d766..b8f681ecd2 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -97,6 +97,12 @@ testCases: validator: class: NnicliValidator +- name: foreground + configFile: test/config/examples/sklearn-regression.yml + launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45 + stopCommand: + experimentStatusCheck: False + # Experiment resume test part 1 - name: nnictl-resume-1 configFile: test/config/examples/sklearn-regression.yml diff --git a/test/nni_test/nnitest/foreground.py b/test/nni_test/nnitest/foreground.py new file mode 100644 index 0000000000..4bfe6c173b --- /dev/null +++ b/test/nni_test/nnitest/foreground.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import subprocess +import argparse +import time +import shlex +import signal + +def test_foreground(args): + launch_command = 'nnictl create --config {} --foreground'.format(args.config) + print('nnictl foreground launch command: ', launch_command, flush=True) + + proc = subprocess.Popen(shlex.split(launch_command)) + + time.sleep(args.timeout) + proc.send_signal(signal.SIGINT) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True) + parser.add_argument("--timeout", type=int, default=45) + args = parser.parse_args() + + test_foreground(args) From d21e95f5facc80368b9ffc10370aff71164acd85 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 20 Mar 2020 14:55:45 +0800 Subject: [PATCH 27/38] add debug info (#2201) --- test/nni_test/nnitest/run_tests.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index fba7392e19..7d7607e28b 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -143,11 +143,19 @@ def launch_test(config_file, training_service, test_case_config): print(str(datetime.datetime.now()), ' waiting ...', flush=True) while True: time.sleep(3) - if time.time() - bg_time > max_duration+10: + waited_time = time.time() - bg_time + if waited_time > max_duration + 10: + print('waited: {}, max_duration: {}'.format(waited_time, max_duration)) break status = get_experiment_status(STATUS_URL) - if status in ['DONE', 'ERROR'] or get_failed_trial_jobs(TRIAL_JOBS_URL): + if status in ['DONE', 'ERROR']: + print('experiment status:', status) break + num_failed = len(get_failed_trial_jobs(TRIAL_JOBS_URL)) + if num_failed > 0: + print('failed jobs: ', num_failed) + break + print(str(datetime.datetime.now()), ' waiting done', flush=True) trial_stats = get_trial_stats(TRIAL_JOBS_URL) From 6bab8eb21b39063b5121d290b0c5d6668239ff6b Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 20 Mar 2020 18:34:00 +0800 Subject: [PATCH 28/38] Add platform match support (#2205) --- test/config/assessors/trial.py | 5 +-- test/config/integration_tests.yml | 5 +++ test/nni_test/nnitest/run_tests.py | 36 +++++++++++++------ test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-pai-windows.yml | 2 +- .../pipelines/pipelines-it-remote-windows.yml | 2 +- 6 files changed, 35 insertions(+), 17 deletions(-) diff --git a/test/config/assessors/trial.py b/test/config/assessors/trial.py index e4bdee0fd9..ad385cc28c 100644 --- a/test/config/assessors/trial.py +++ b/test/config/assessors/trial.py @@ -7,10 +7,7 @@ if __name__ == '__main__': print('trial start') - if random.random() > 0.5: - up = True - else: - up = False + up = random.random() > 0.5 v = 0.5 nni.get_next_parameter() for i in range(20): diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index b8f681ecd2..58b1b6cd3f 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -3,6 +3,7 @@ defaultTestCaseConfig: launchCommand: nnictl create --config $configFile stopCommand: nnictl stop experimentStatusCheck: True + platform: linux darwin win32 testCases: ####################################################################### @@ -96,12 +97,14 @@ testCases: stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' validator: class: NnicliValidator + platform: linux darwin - name: foreground configFile: test/config/examples/sklearn-regression.yml launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45 stopCommand: experimentStatusCheck: False + platform: linux darwin # Experiment resume test part 1 - name: nnictl-resume-1 @@ -167,6 +170,7 @@ testCases: - name: tuner-smac configFile: test/config/tuners/mnist-annotation-smac.yml + platform: linux darwin - name: tuner-tpe configFile: test/config/tuners/mnist-annotation-tpe.yml @@ -176,6 +180,7 @@ testCases: - name: tuner-bohb configFile: test/config/tuners/mnist-bohb.yml + platform: linux darwin - name: tuner-gp configFile: test/config/tuners/mnist-gp.yml diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 7d7607e28b..5785e214ff 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -14,7 +14,7 @@ from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs -from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT +from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT, detect_port import validators it_variables = {} @@ -63,12 +63,6 @@ def prepare_config_file(test_case_config, it_config, args): return new_config_file def run_test_case(test_case_config, it_config, args): - # fill test case default config - for k in it_config['defaultTestCaseConfig']: - if k not in test_case_config: - test_case_config[k] = it_config['defaultTestCaseConfig'][k] - print(json.dumps(test_case_config, indent=4)) - new_config_file = prepare_config_file(test_case_config, it_config, args) # set configFile variable it_variables['$configFile'] = new_config_file @@ -96,7 +90,6 @@ def invoke_validator(test_case_config, nni_source_dir): validator(REST_ENDPOINT, None, nni_source_dir, **kwargs) def get_max_values(config_file): - '''Get maxExecDuration and maxTrialNum of experiment''' experiment_config = get_yml_content(config_file) return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum'] @@ -118,7 +111,6 @@ def get_command(test_case_config, commandKey): return command def launch_test(config_file, training_service, test_case_config): - '''run test per configuration file''' launch_command = get_command(test_case_config, 'launchCommand') print('launch command: ', launch_command, flush=True) @@ -181,6 +173,19 @@ def case_included(name, cases): return True return False +def wait_for_port_available(port, timeout): + begin_time = time.time() + while True: + if not detect_port(port): + return + if time.time() - begin_time > timeout: + msg = 'port {} is not available in {} seconds.'.format(port, timeout) + raise RuntimeError(msg) + time.sleep(5) + +def match_platform(test_case_config): + return sys.platform in test_case_config['platform'].split(' ') + def run(args): it_config = get_yml_content(args.config) @@ -191,8 +196,19 @@ def run(args): continue if args.cases and not case_included(name, args.cases): continue + + # fill test case default config + for k in it_config['defaultTestCaseConfig']: + if k not in test_case_config: + test_case_config[k] = it_config['defaultTestCaseConfig'][k] + print(json.dumps(test_case_config, indent=4)) + + if not match_platform(test_case_config): + print('skipped {}, platform {} not match [{}]'.format(name, sys.platform, test_case_config['platform'])) + continue + + wait_for_port_available(8080, 30) print('{}Testing: {}{}'.format(GREEN, name, CLEAR)) - time.sleep(5) begin_time = time.time() run_test_case(test_case_config, it_config, args) diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index 8d79ab6cc0..eb347c9766 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -22,5 +22,5 @@ jobs: displayName: 'Naive test' - script: | cd test - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local --exclude smac,bohb,nnicli + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local displayName: 'Integration tests' diff --git a/test/pipelines/pipelines-it-pai-windows.yml b/test/pipelines/pipelines-it-pai-windows.yml index 39a76a768e..f1fc6d1f05 100644 --- a/test/pipelines/pipelines-it-pai-windows.yml +++ b/test/pipelines/pipelines-it-pai-windows.yml @@ -64,5 +64,5 @@ jobs: set PATH=$(ENV_PATH) python --version python nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase,smac,bohb,nnicli + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase displayName: 'Examples and advanced features tests on pai' \ No newline at end of file diff --git a/test/pipelines/pipelines-it-remote-windows.yml b/test/pipelines/pipelines-it-remote-windows.yml index b70438511d..c43e162688 100644 --- a/test/pipelines/pipelines-it-remote-windows.yml +++ b/test/pipelines/pipelines-it-remote-windows.yml @@ -44,7 +44,7 @@ jobs: cd test python nni_test/nnitest/generate_ts_config.py --ts remote --remote_user $(docker_user) --remote_host $(remote_host) --remote_port $(Get-Content port) --remote_pwd $(docker_pwd) --nni_manager_ip $(nni_manager_ip) Get-Content config/training_service.yml - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote --exclude cifar10,smac,bohb,nnicli + python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote --exclude cifar10 displayName: 'integration test' - task: SSH@0 inputs: From ee4074a067f471100ce5b3c952205bf68c7922a2 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 20 Mar 2020 22:43:16 +0800 Subject: [PATCH 29/38] updates trial concurrency (#2206) --- test/config/assessors/curvefitting.yml | 4 ++-- test/config/assessors/medianstop.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/config/assessors/curvefitting.yml b/test/config/assessors/curvefitting.yml index c6c0f393ca..d644ba946e 100644 --- a/test/config/assessors/curvefitting.yml +++ b/test/config/assessors/curvefitting.yml @@ -1,8 +1,8 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 8 -trialConcurrency: 8 +maxTrialNum: 4 +trialConcurrency: 4 searchSpacePath: search_space.json tuner: diff --git a/test/config/assessors/medianstop.yml b/test/config/assessors/medianstop.yml index 2a2983d500..c5d7407572 100644 --- a/test/config/assessors/medianstop.yml +++ b/test/config/assessors/medianstop.yml @@ -1,8 +1,8 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 8 -trialConcurrency: 8 +maxTrialNum: 4 +trialConcurrency: 4 searchSpacePath: search_space.json tuner: From 468cf432dad13b0041c9062d747080687c8fde37 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 17:44:39 +0800 Subject: [PATCH 30/38] refactor tuner tests (#2207) --- azure-pipelines.yml | 22 +-- test/config/assessors/curvefitting.yml | 8 +- test/config/assessors/medianstop.yml | 8 +- test/config/assessors/search_space.json | 7 - test/config/assessors/trial.py | 22 --- test/config/integration_tests.yml | 22 +-- test/config/naive_trial/search_space.json | 17 +++ test/config/naive_trial/trial.py | 29 ++++ test/config/pr_tests.yml | 142 ++++++++++++++++++ .../batchtuner_search_space.json | 8 - test/config/tuner_naive_trial_test/local.yml | 20 --- .../tuner_naive_trial_test/local_win32.yml | 20 --- .../tuner_naive_trial_test/naive_trial.py | 10 -- .../tuner_naive_trial_test/search_space.json | 7 - ...mnist-annotation-anneal.yml => anneal.yml} | 0 .../{mnist-batchtuner.yml => batch.yml} | 0 .../tuners/{mnist-bohb.yml => bohb.yml} | 0 ...annotation-evolution.yml => evolution.yml} | 0 test/config/tuners/{mnist-gp.yml => gp.yml} | 0 .../{mnist-gridsearch.yml => gridsearch.yml} | 0 .../{mnist-hyperband.yml => hyperband.yml} | 0 .../tuners/{mnist-metis.yml => metis.yml} | 0 ...mnist-annotation-random.yml => random.yml} | 0 .../{mnist-annotation-smac.yml => smac.yml} | 0 .../{mnist-annotation-tpe.yml => tpe.yml} | 0 test/nni_test/nnitest/tuner_test.py | 88 ----------- 26 files changed, 210 insertions(+), 220 deletions(-) delete mode 100644 test/config/assessors/search_space.json delete mode 100644 test/config/assessors/trial.py create mode 100644 test/config/naive_trial/search_space.json create mode 100644 test/config/naive_trial/trial.py create mode 100644 test/config/pr_tests.yml delete mode 100644 test/config/tuner_naive_trial_test/batchtuner_search_space.json delete mode 100644 test/config/tuner_naive_trial_test/local.yml delete mode 100644 test/config/tuner_naive_trial_test/local_win32.yml delete mode 100644 test/config/tuner_naive_trial_test/naive_trial.py delete mode 100644 test/config/tuner_naive_trial_test/search_space.json rename test/config/tuners/{mnist-annotation-anneal.yml => anneal.yml} (100%) rename test/config/tuners/{mnist-batchtuner.yml => batch.yml} (100%) rename test/config/tuners/{mnist-bohb.yml => bohb.yml} (100%) rename test/config/tuners/{mnist-annotation-evolution.yml => evolution.yml} (100%) rename test/config/tuners/{mnist-gp.yml => gp.yml} (100%) rename test/config/tuners/{mnist-gridsearch.yml => gridsearch.yml} (100%) rename test/config/tuners/{mnist-hyperband.yml => hyperband.yml} (100%) rename test/config/tuners/{mnist-metis.yml => metis.yml} (100%) rename test/config/tuners/{mnist-annotation-random.yml => random.yml} (100%) rename test/config/tuners/{mnist-annotation-smac.yml => smac.yml} (100%) rename test/config/tuners/{mnist-annotation-tpe.yml => tpe.yml} (100%) delete mode 100644 test/nni_test/nnitest/tuner_test.py diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 51deec52b2..f57127fe0f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -60,15 +60,7 @@ jobs: displayName: 'Naive test' - script: | cd test - python3 nni_test/nnitest/tuner_test.py - displayName: 'Built-in tuners / assessors with naive trial tests' - - script: | - cd test - python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases metrics - displayName: 'Trial job metrics test' - - script: | - cd test - python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli + python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml displayName: 'nnicli test' - script: | cd docs/en_US/ @@ -109,11 +101,7 @@ jobs: displayName: 'Naive test' - script: | cd test - python3 nni_test/nnitest/tuner_test.py - displayName: 'Built-in tuners / assessors with naive trial tests' - - script: | - cd test - python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli + python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml displayName: 'nnicli test' - job: 'basic_test_pr_Windows' @@ -141,9 +129,5 @@ jobs: displayName: 'unit test' - script: | cd test - python nni_test/nnitest/tuner_test.py - displayName: 'Built-in tuners / assessors with naive trial tests' - - script: | - cd test - python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --cases nnicli + python nni_test/nnitest/run_tests.py --config config/pr_tests.yml displayName: 'nnicli test' diff --git a/test/config/assessors/curvefitting.yml b/test/config/assessors/curvefitting.yml index d644ba946e..774f96adfd 100644 --- a/test/config/assessors/curvefitting.yml +++ b/test/config/assessors/curvefitting.yml @@ -1,9 +1,9 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 4 -trialConcurrency: 4 -searchSpacePath: search_space.json +maxTrialNum: 8 +trialConcurrency: 8 +searchSpacePath: ../naive_trial/search_space.json tuner: builtinTunerName: TPE @@ -17,7 +17,7 @@ assessor: start_step: 6 threshold: 0.95 trial: - codeDir: ./ + codeDir: ../naive_trial command: python3 trial.py gpuNum: 0 diff --git a/test/config/assessors/medianstop.yml b/test/config/assessors/medianstop.yml index c5d7407572..672b8e4cb4 100644 --- a/test/config/assessors/medianstop.yml +++ b/test/config/assessors/medianstop.yml @@ -1,9 +1,9 @@ authorName: nni experimentName: default_test maxExecDuration: 5m -maxTrialNum: 4 -trialConcurrency: 4 -searchSpacePath: search_space.json +maxTrialNum: 8 +trialConcurrency: 8 +searchSpacePath: ../naive_trial/search_space.json tuner: builtinTunerName: TPE @@ -16,7 +16,7 @@ assessor: optimize_mode: maximize trial: - codeDir: ./ + codeDir: ../naive_trial command: python3 trial.py gpuNum: 0 diff --git a/test/config/assessors/search_space.json b/test/config/assessors/search_space.json deleted file mode 100644 index 0b7ef39ec3..0000000000 --- a/test/config/assessors/search_space.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "test": - { - "_type" : "choice", - "_value" : [1, 100] - } -} diff --git a/test/config/assessors/trial.py b/test/config/assessors/trial.py deleted file mode 100644 index ad385cc28c..0000000000 --- a/test/config/assessors/trial.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import random -import time -import nni - -if __name__ == '__main__': - print('trial start') - up = random.random() > 0.5 - v = 0.5 - nni.get_next_parameter() - for i in range(20): - time.sleep(1) - for _ in range(2): - if up: - v *= 1.1 - else: - v *= 0.9 - nni.report_intermediate_result(v) - nni.report_final_result(v) - print('trial done') diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index 58b1b6cd3f..bd52a69596 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -160,36 +160,36 @@ testCases: # nni tuners test ######################################################################### - name: tuner-annel - configFile: test/config/tuners/mnist-annotation-anneal.yml + configFile: test/config/tuners/anneal.yml - name: tuner-evolution - configFile: test/config/tuners/mnist-annotation-evolution.yml + configFile: test/config/tuners/evolution.yml - name: tuner-random - configFile: test/config/tuners/mnist-annotation-random.yml + configFile: test/config/tuners/random.yml - name: tuner-smac - configFile: test/config/tuners/mnist-annotation-smac.yml + configFile: test/config/tuners/smac.yml platform: linux darwin - name: tuner-tpe - configFile: test/config/tuners/mnist-annotation-tpe.yml + configFile: test/config/tuners/tpe.yml - name: tuner-batch - configFile: test/config/tuners/mnist-batchtuner.yml + configFile: test/config/tuners/batch.yml - name: tuner-bohb - configFile: test/config/tuners/mnist-bohb.yml + configFile: test/config/tuners/bohb.yml platform: linux darwin - name: tuner-gp - configFile: test/config/tuners/mnist-gp.yml + configFile: test/config/tuners/gp.yml - name: tuner-grid - configFile: test/config/tuners/mnist-gridsearch.yml + configFile: test/config/tuners/gridsearch.yml - name: tuner-hyperband - configFile: test/config/tuners/mnist-hyperband.yml + configFile: test/config/tuners/hyperband.yml - name: tuner-metis - configFile: test/config/tuners/mnist-metis.yml + configFile: test/config/tuners/metis.yml diff --git a/test/config/naive_trial/search_space.json b/test/config/naive_trial/search_space.json new file mode 100644 index 0000000000..5c2084f310 --- /dev/null +++ b/test/config/naive_trial/search_space.json @@ -0,0 +1,17 @@ +{ + "k": + { + "_type" : "randint", + "_value" : [0, 4] + }, + "d": + { + "_type" : "choice", + "_value" : [-1, 1] + }, + "n": + { + "_type" : "uniform", + "_value" : [0, 0.2] + } +} diff --git a/test/config/naive_trial/trial.py b/test/config/naive_trial/trial.py new file mode 100644 index 0000000000..1d83fcacf6 --- /dev/null +++ b/test/config/naive_trial/trial.py @@ -0,0 +1,29 @@ +import random +import time +import math +import nni + +curve_func = { + 0: lambda x: x, + 1: lambda x: x * x, + 2: lambda x: math.pow(x, 0.5), + 3: lambda x: math.tanh(x) +} + +if __name__ == '__main__': + print('trial start') + + params = nni.get_next_parameter() + print('params:', params) + epochs = 20 + + for i in range(epochs): + v = curve_func[params['k']](i / epochs) + v += v * (random.random() * params['n']) + v *= params['d'] + nni.report_intermediate_result(v) + + if i % 5 == 0: + time.sleep(1) + nni.report_final_result(v) + print('trial done') diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml new file mode 100644 index 0000000000..1ddcb70631 --- /dev/null +++ b/test/config/pr_tests.yml @@ -0,0 +1,142 @@ + +defaultTestCaseConfig: + launchCommand: nnictl create --config $configFile + stopCommand: nnictl stop + experimentStatusCheck: True + platform: linux darwin win32 + +testCases: + +######################################################################### +# nni features test +######################################################################### +- name: metrics-float + configFile: test/config/metrics_test/config.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: MetricsValidator + kwargs: + expected_result_file: expected_metrics.json + +# to be enabled +#- name: metrics-dict +# configFile: test/config/metrics_test/config_dict_metrics.yml +# config: +# maxTrialNum: 1 +# trialConcurrency: 1 +# validator: +# class: MetricsValidator +# kwargs: +# expected_result_file: expected_metrics_dict.json + +- name: nnicli + configFile: test/config/examples/sklearn-regression.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' + stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' + validator: + class: NnicliValidator + platform: linux darwin + +- name: multi-thread + configFile: test/config/multi_thread/config.yml + +- name: multi-phase-batch + configFile: test/config/multi_phase/batch.yml + config: + # for batch tuner, maxTrialNum can not exceed length of search space + maxTrialNum: 2 + trialConcurrency: 2 + +######################################################################### +# nni assessor test +######################################################################### +- name: assessor-curvefitting + configFile: test/config/assessors/curvefitting.yml + +- name: assessor-medianstop + configFile: test/config/assessors/medianstop.yml + +######################################################################### +# nni tuners test +######################################################################### +- name: tuner-annel + configFile: test/config/tuners/anneal.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-evolution + configFile: test/config/tuners/evolution.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-random + configFile: test/config/tuners/random.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-tpe + configFile: test/config/tuners/tpe.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-batch + configFile: test/config/tuners/batch.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-gp + configFile: test/config/tuners/gp.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py + +- name: tuner-grid + configFile: test/config/tuners/gridsearch.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + useAnnotation: False + searchSpacePath: ../naive_trial/search_space.json + trial: + codeDir: ../naive_trial + command: python3 trial.py diff --git a/test/config/tuner_naive_trial_test/batchtuner_search_space.json b/test/config/tuner_naive_trial_test/batchtuner_search_space.json deleted file mode 100644 index 5d3beeee85..0000000000 --- a/test/config/tuner_naive_trial_test/batchtuner_search_space.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "combine_params": - { - "_type" : "choice", - "_value" : [{"x": 1}, - {"x": 100}] - } -} diff --git a/test/config/tuner_naive_trial_test/local.yml b/test/config/tuner_naive_trial_test/local.yml deleted file mode 100644 index e5598914ad..0000000000 --- a/test/config/tuner_naive_trial_test/local.yml +++ /dev/null @@ -1,20 +0,0 @@ -assessor: - builtinAssessorName: Medianstop - classArgs: - optimize_mode: maximize -authorName: nni -experimentName: test_sdk -maxExecDuration: 1h -maxTrialNum: 2 -searchSpacePath: search_space.json -trainingServicePlatform: local -trial: - codeDir: . - command: python3 naive_trial.py - gpuNum: 0 -trialConcurrency: 2 -tuner: - builtinTunerName: Evolution - classArgs: - optimize_mode: maximize -useAnnotation: false diff --git a/test/config/tuner_naive_trial_test/local_win32.yml b/test/config/tuner_naive_trial_test/local_win32.yml deleted file mode 100644 index 5cd144a789..0000000000 --- a/test/config/tuner_naive_trial_test/local_win32.yml +++ /dev/null @@ -1,20 +0,0 @@ -assessor: - builtinAssessorName: Medianstop - classArgs: - optimize_mode: maximize -authorName: nni -experimentName: test_sdk -maxExecDuration: 1h -maxTrialNum: 2 -searchSpacePath: search_space.json -trainingServicePlatform: local -trial: - codeDir: . - command: python naive_trial.py - gpuNum: 0 -trialConcurrency: 2 -tuner: - builtinTunerName: Evolution - classArgs: - optimize_mode: maximize -useAnnotation: false diff --git a/test/config/tuner_naive_trial_test/naive_trial.py b/test/config/tuner_naive_trial_test/naive_trial.py deleted file mode 100644 index ba71dedff2..0000000000 --- a/test/config/tuner_naive_trial_test/naive_trial.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import nni - -params = nni.get_next_parameter() -print('params:', params) -x = params['x'] - -nni.report_final_result(x) diff --git a/test/config/tuner_naive_trial_test/search_space.json b/test/config/tuner_naive_trial_test/search_space.json deleted file mode 100644 index f20e76e0c5..0000000000 --- a/test/config/tuner_naive_trial_test/search_space.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "x": - { - "_type" : "choice", - "_value" : [1, 100] - } -} diff --git a/test/config/tuners/mnist-annotation-anneal.yml b/test/config/tuners/anneal.yml similarity index 100% rename from test/config/tuners/mnist-annotation-anneal.yml rename to test/config/tuners/anneal.yml diff --git a/test/config/tuners/mnist-batchtuner.yml b/test/config/tuners/batch.yml similarity index 100% rename from test/config/tuners/mnist-batchtuner.yml rename to test/config/tuners/batch.yml diff --git a/test/config/tuners/mnist-bohb.yml b/test/config/tuners/bohb.yml similarity index 100% rename from test/config/tuners/mnist-bohb.yml rename to test/config/tuners/bohb.yml diff --git a/test/config/tuners/mnist-annotation-evolution.yml b/test/config/tuners/evolution.yml similarity index 100% rename from test/config/tuners/mnist-annotation-evolution.yml rename to test/config/tuners/evolution.yml diff --git a/test/config/tuners/mnist-gp.yml b/test/config/tuners/gp.yml similarity index 100% rename from test/config/tuners/mnist-gp.yml rename to test/config/tuners/gp.yml diff --git a/test/config/tuners/mnist-gridsearch.yml b/test/config/tuners/gridsearch.yml similarity index 100% rename from test/config/tuners/mnist-gridsearch.yml rename to test/config/tuners/gridsearch.yml diff --git a/test/config/tuners/mnist-hyperband.yml b/test/config/tuners/hyperband.yml similarity index 100% rename from test/config/tuners/mnist-hyperband.yml rename to test/config/tuners/hyperband.yml diff --git a/test/config/tuners/mnist-metis.yml b/test/config/tuners/metis.yml similarity index 100% rename from test/config/tuners/mnist-metis.yml rename to test/config/tuners/metis.yml diff --git a/test/config/tuners/mnist-annotation-random.yml b/test/config/tuners/random.yml similarity index 100% rename from test/config/tuners/mnist-annotation-random.yml rename to test/config/tuners/random.yml diff --git a/test/config/tuners/mnist-annotation-smac.yml b/test/config/tuners/smac.yml similarity index 100% rename from test/config/tuners/mnist-annotation-smac.yml rename to test/config/tuners/smac.yml diff --git a/test/config/tuners/mnist-annotation-tpe.yml b/test/config/tuners/tpe.yml similarity index 100% rename from test/config/tuners/mnist-annotation-tpe.yml rename to test/config/tuners/tpe.yml diff --git a/test/nni_test/nnitest/tuner_test.py b/test/nni_test/nnitest/tuner_test.py deleted file mode 100644 index 78168f1e0b..0000000000 --- a/test/nni_test/nnitest/tuner_test.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import sys -import os.path as osp -import subprocess -import sys -import time -import traceback - -from utils import get_yml_content, dump_yml_content, setup_experiment, get_nni_log_path, is_experiment_done -from utils import GREEN, RED, CLEAR, EXPERIMENT_URL - -TUNER_LIST = ['GridSearch', 'BatchTuner', 'TPE', 'Random', 'Anneal', 'Evolution'] -ASSESSOR_LIST = ['Medianstop'] - - -def get_config_file_path(): - if sys.platform == 'win32': - config_file = osp.join('config', 'tuner_naive_trial_test', 'local_win32.yml') - else: - config_file = osp.join('config', 'tuner_naive_trial_test', 'local.yml') - return config_file - -def switch(dispatch_type, dispatch_name): - '''Change dispatch in config.yml''' - config_path = get_config_file_path() - experiment_config = get_yml_content(config_path) - if dispatch_name in ['GridSearch', 'BatchTuner', 'Random']: - experiment_config[dispatch_type.lower()] = { - 'builtin' + dispatch_type + 'Name': dispatch_name - } - else: - experiment_config[dispatch_type.lower()] = { - 'builtin' + dispatch_type + 'Name': dispatch_name, - 'classArgs': { - 'optimize_mode': 'maximize' - } - } - if dispatch_name == 'BatchTuner': - experiment_config['searchSpacePath'] = 'batchtuner_search_space.json' - else: - experiment_config['searchSpacePath'] = 'search_space.json' - dump_yml_content(config_path, experiment_config) - -def test_builtin_dispatcher(dispatch_type, dispatch_name): - '''test a dispatcher whose type is dispatch_type and name is dispatch_name''' - switch(dispatch_type, dispatch_name) - - print('Testing %s...' % dispatch_name) - proc = subprocess.run(['nnictl', 'create', '--config', get_config_file_path()]) - assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode - - nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL) - - for _ in range(20): - time.sleep(3) - # check if experiment is done - experiment_status = is_experiment_done(nnimanager_log_path) - if experiment_status: - break - - assert experiment_status, 'Failed to finish in 1 min' - -def run(dispatch_type): - '''test all dispatchers whose type is dispatch_type''' - assert dispatch_type in ['Tuner', 'Assessor'], 'Unsupported dispatcher type: %s' % (dispatch_type) - dipsatcher_list = TUNER_LIST if dispatch_type == 'Tuner' else ASSESSOR_LIST - for dispatcher_name in dipsatcher_list: - try: - # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict - time.sleep(6) - test_builtin_dispatcher(dispatch_type, dispatcher_name) - print(GREEN + 'Test %s %s: TEST PASS' % (dispatcher_name, dispatch_type) + CLEAR) - except Exception as error: - print(RED + 'Test %s %s: TEST FAIL' % (dispatcher_name, dispatch_type) + CLEAR) - print('%r' % error) - traceback.print_exc() - raise error - finally: - subprocess.run(['nnictl', 'stop']) - -if __name__ == '__main__': - installed = (sys.argv[-1] != '--preinstall') - setup_experiment(installed) - - run('Tuner') - run('Assessor') From 0b9de9077245ac63cdcbdb416f07dfca50ee8831 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 18:06:02 +0800 Subject: [PATCH 31/38] rename pr tests (#2208) --- azure-pipelines.yml | 6 +++--- test/nni_test/nnitest/run_tests.py | 4 ++-- test/nni_test/nnitest/utils.py | 17 ----------------- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f57127fe0f..11cb22688f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -61,7 +61,7 @@ jobs: - script: | cd test python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml - displayName: 'nnicli test' + displayName: 'Simple test' - script: | cd docs/en_US/ sphinx-build -M html . _build -W @@ -102,7 +102,7 @@ jobs: - script: | cd test python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml - displayName: 'nnicli test' + displayName: 'Simple test' - job: 'basic_test_pr_Windows' pool: @@ -130,4 +130,4 @@ jobs: - script: | cd test python nni_test/nnitest/run_tests.py --config config/pr_tests.yml - displayName: 'nnicli test' + displayName: 'Simple test' diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 5785e214ff..1c9d79cdd0 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -13,7 +13,7 @@ import ruamel.yaml as yaml from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ - parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs + parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs, get_experiment_dir from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT, detect_port import validators @@ -87,7 +87,7 @@ def invoke_validator(test_case_config, nni_source_dir): validator = validators.__dict__[validator_config.get('class')]() kwargs = validator_config.get('kwargs', {}) print('kwargs:', kwargs) - validator(REST_ENDPOINT, None, nni_source_dir, **kwargs) + validator(REST_ENDPOINT, get_experiment_dir(EXPERIMENT_URL), nni_source_dir, **kwargs) def get_max_values(config_file): experiment_config = get_yml_content(config_file) diff --git a/test/nni_test/nnitest/utils.py b/test/nni_test/nnitest/utils.py index e16b4dc949..a3b4723d47 100644 --- a/test/nni_test/nnitest/utils.py +++ b/test/nni_test/nnitest/utils.py @@ -128,23 +128,6 @@ def print_trial_job_log(training_service, trial_jobs_url): print(log_file, flush=True) print(log_content, flush=True) -def print_failed_job_log(training_service, trial_jobs_url): - '''Print job log of FAILED trial jobs''' - trial_jobs = get_failed_trial_jobs(trial_jobs_url) - for trial_job in trial_jobs: - if training_service == 'local': - if sys.platform == "win32": - p = trial_job['stderrPath'].split(':') - log_filename = ':'.join([p[-2], p[-1]]) - else: - log_filename = trial_job['stderrPath'].split(':')[-1] - else: - log_filename = os.path.join(get_experiment_dir(EXPERIMENT_URL), 'trials', trial_job['id'], 'stdout_log_collection.log') - with open(log_filename, 'r') as f: - log_content = f.read() - print(log_filename, flush=True) - print(log_content, flush=True) - def parse_max_duration_time(max_exec_duration): unit = max_exec_duration[-1] time = max_exec_duration[:-1] From 315b50a4479762a11a3c966e712ce9142888bb87 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 19:47:48 +0800 Subject: [PATCH 32/38] Fix batch/grid test cases (#2209) --- test/config/pr_tests.yml | 8 ++++---- test/nni_test/nnitest/run_tests.py | 7 +++++-- test/nni_test/nnitest/utils.py | 25 ++++++++++++++----------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml index 1ddcb70631..df51edf9f2 100644 --- a/test/config/pr_tests.yml +++ b/test/config/pr_tests.yml @@ -114,10 +114,10 @@ testCases: maxTrialNum: 2 trialConcurrency: 2 useAnnotation: False - searchSpacePath: ../naive_trial/search_space.json + searchSpacePath: ../naive_trial/search_space_choices.json trial: codeDir: ../naive_trial - command: python3 trial.py + command: python3 trial_choices.py - name: tuner-gp configFile: test/config/tuners/gp.yml @@ -136,7 +136,7 @@ testCases: maxTrialNum: 2 trialConcurrency: 2 useAnnotation: False - searchSpacePath: ../naive_trial/search_space.json + searchSpacePath: ../naive_trial/search_space_choices.json trial: codeDir: ../naive_trial - command: python3 trial.py + command: python3 trial_choices.py diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 1c9d79cdd0..33c3edf198 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -12,8 +12,9 @@ import json import ruamel.yaml as yaml -from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ - parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs, get_experiment_dir +from utils import get_experiment_status, get_yml_content, dump_yml_content, get_experiment_id, \ + parse_max_duration_time, get_trial_stats, deep_update, print_trial_job_log, get_failed_trial_jobs, \ + get_experiment_dir, print_experiment_log from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, EXPERIMENT_URL, REST_ENDPOINT, detect_port import validators @@ -149,6 +150,8 @@ def launch_test(config_file, training_service, test_case_config): break print(str(datetime.datetime.now()), ' waiting done', flush=True) + if get_experiment_status(STATUS_URL) == 'ERROR': + print_experiment_log(EXPERIMENT_URL) trial_stats = get_trial_stats(TRIAL_JOBS_URL) print(json.dumps(trial_stats, indent=4), flush=True) diff --git a/test/nni_test/nnitest/utils.py b/test/nni_test/nnitest/utils.py index a3b4723d47..c5a6c05ca2 100644 --- a/test/nni_test/nnitest/utils.py +++ b/test/nni_test/nnitest/utils.py @@ -111,22 +111,25 @@ def get_failed_trial_jobs(trial_jobs_url): '''Return failed trial jobs''' return get_trial_jobs(trial_jobs_url, 'FAILED') +def print_file_content(filepath): + with open(filepath, 'r') as f: + content = f.read() + print(filepath, flush=True) + print(content, flush=True) + def print_trial_job_log(training_service, trial_jobs_url): - '''Print job log of FAILED trial jobs''' trial_jobs = get_trial_jobs(trial_jobs_url) for trial_job in trial_jobs: - log_files = [] trial_log_dir = os.path.join(get_experiment_dir(EXPERIMENT_URL), 'trials', trial_job['id']) - if training_service == 'local': - log_files.append(os.path.join(trial_log_dir, 'stderr')) - log_files.append(os.path.join(trial_log_dir, 'trial.log')) - else: - log_files.append(os.path.join(trial_log_dir, 'stdout_log_collection.log')) + log_files = ['stderr', 'trial.log'] if training_service == 'local' else ['stdout_log_collection.log'] for log_file in log_files: - with open(log_file, 'r') as f: - log_content = f.read() - print(log_file, flush=True) - print(log_content, flush=True) + print_file_content(os.path.join(trial_log_dir, log_file)) + +def print_experiment_log(experiment_url): + log_dir = get_nni_log_dir(experiment_url) + for log_file in ['dispatcher.log', 'nnimanager.log']: + filepath = os.path.join(log_dir, log_file) + print_file_content(filepath) def parse_max_duration_time(max_exec_duration): unit = max_exec_duration[-1] From ee5b1b7160674d242f82ff498789135e475470a9 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 20:02:38 +0800 Subject: [PATCH 33/38] add missing files (#2210) --- test/config/naive_trial/search_space_choices.json | 7 +++++++ test/config/naive_trial/trial_choices.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 test/config/naive_trial/search_space_choices.json create mode 100644 test/config/naive_trial/trial_choices.py diff --git a/test/config/naive_trial/search_space_choices.json b/test/config/naive_trial/search_space_choices.json new file mode 100644 index 0000000000..6262095eef --- /dev/null +++ b/test/config/naive_trial/search_space_choices.json @@ -0,0 +1,7 @@ +{ + "p": + { + "_type" : "choice", + "_value" : [1, 2, 3, 4] + } +} diff --git a/test/config/naive_trial/trial_choices.py b/test/config/naive_trial/trial_choices.py new file mode 100644 index 0000000000..878c4ef41e --- /dev/null +++ b/test/config/naive_trial/trial_choices.py @@ -0,0 +1,13 @@ +import random +import nni + +if __name__ == '__main__': + print('trial start') + + params = nni.get_next_parameter() + print('params:', params) + + nni.report_intermediate_result(random.random()) + nni.report_final_result(random.random()) + + print('trial done') From 7bce40c4c7f5e8fc47e48c36de5cf15108652b00 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 21:07:45 +0800 Subject: [PATCH 34/38] Refactor naive test (#2211) --- azure-pipelines.yml | 8 ------ test/config/naive_test/local_win32.yml | 26 ------------------ test/config/pr_tests.yml | 10 +++++++ test/nni_test/nnitest/naive_test.py | 27 ++++++++++--------- test/nni_test/nnitest/run_tests.py | 2 +- test/pipelines/pipelines-it-local-windows.yml | 4 --- test/pipelines/pipelines-it-local.yml | 4 --- 7 files changed, 25 insertions(+), 56 deletions(-) delete mode 100644 test/config/naive_test/local_win32.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 11cb22688f..2196ddf555 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -54,10 +54,6 @@ jobs: cd test source scripts/unittest.sh displayName: 'Unit test' - - script: | - cd test - python3 nni_test/nnitest/naive_test.py - displayName: 'Naive test' - script: | cd test python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml @@ -95,10 +91,6 @@ jobs: cd test source scripts/unittest.sh displayName: 'Unit test' - - script: | - cd test - python3 nni_test/nnitest/naive_test.py - displayName: 'Naive test' - script: | cd test python3 nni_test/nnitest/run_tests.py --config config/pr_tests.yml diff --git a/test/config/naive_test/local_win32.yml b/test/config/naive_test/local_win32.yml deleted file mode 100644 index 679b3819e7..0000000000 --- a/test/config/naive_test/local_win32.yml +++ /dev/null @@ -1,26 +0,0 @@ -authorName: nni -experimentName: naive -trialConcurrency: 3 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - codeDir: . - classFileName: naive_tuner.py - className: NaiveTuner - classArgs: - optimize_mode: maximize -assessor: - codeDir: . - classFileName: naive_assessor.py - className: NaiveAssessor - classArgs: - optimize_mode: maximize -trial: - command: python naive_trial.py - codeDir: . - gpuNum: 0 diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml index df51edf9f2..86040d1f92 100644 --- a/test/config/pr_tests.yml +++ b/test/config/pr_tests.yml @@ -7,6 +7,16 @@ defaultTestCaseConfig: testCases: +######################################################################### +# naive test +######################################################################### + +- name: naive-test + configFile: test/config/naive_test/local.yml + launchCommand: python3 nni_test/nnitest/naive_test.py --config $configFile + experimentStatusCheck: False + stopCommand: + ######################################################################### # nni features test ######################################################################### diff --git a/test/nni_test/nnitest/naive_test.py b/test/nni_test/nnitest/naive_test.py index f70bc9a250..cedd8b4ad4 100644 --- a/test/nni_test/nnitest/naive_test.py +++ b/test/nni_test/nnitest/naive_test.py @@ -3,6 +3,7 @@ import sys import os.path as osp +import argparse import json import subprocess import sys @@ -12,19 +13,16 @@ from utils import is_experiment_done, get_experiment_id, get_nni_log_path, read_last_line, remove_files, setup_experiment, detect_port, snooze from utils import GREEN, RED, CLEAR, EXPERIMENT_URL -NAIVE_TEST_CONFIG_DIR = 'config/naive_test' +NNI_SOURCE_DIR = '..' +NAIVE_TEST_CONFIG_DIR = osp.join(NNI_SOURCE_DIR, 'test', 'config', 'naive_test') -def naive_test(): +def naive_test(args): '''run naive integration test''' to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt'] to_remove = list(map(lambda file: osp.join(NAIVE_TEST_CONFIG_DIR, file), to_remove)) remove_files(to_remove) - if sys.platform == 'win32': - config_file = 'local_win32.yml' - else: - config_file = 'local.yml' - proc = subprocess.run(['nnictl', 'create', '--config', osp.join(NAIVE_TEST_CONFIG_DIR, config_file)]) + proc = subprocess.run(['nnictl', 'create', '--config', args.config]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode print('Spawning trials...') @@ -75,8 +73,8 @@ def naive_test(): subprocess.run(['nnictl', 'stop']) snooze() -def stop_experiment_test(): - config_file = osp.join(NAIVE_TEST_CONFIG_DIR, 'local.yml') +def stop_experiment_test(args): + config_file = args.config '''Test `nnictl stop` command, including `nnictl stop exp_id` and `nnictl stop all`. Simple `nnictl stop` is not tested here since it is used in all other test code''' subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8080'], check=True) @@ -105,11 +103,14 @@ def stop_experiment_test(): if __name__ == '__main__': - installed = (sys.argv[-1] != '--preinstall') - setup_experiment(installed) + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True) + parser.add_argument("--preinstall", action='store_true') + args = parser.parse_args() + setup_experiment(not args.preinstall) try: - naive_test() - stop_experiment_test() + naive_test(args) + stop_experiment_test(args) # TODO: check the output of rest server print(GREEN + 'PASS' + CLEAR) except Exception as error: diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index 33c3edf198..a2c72c418e 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -221,7 +221,7 @@ def run(args): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, required=True) - parser.add_argument("--nni_source_dir", type=str, default='../') + parser.add_argument("--nni_source_dir", type=str, default='..') parser.add_argument("--cases", type=str, default=None) parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index eb347c9766..17736332b1 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -16,10 +16,6 @@ jobs: cd test powershell.exe -file scripts/unittest.ps1 displayName: 'unit test' - - script: | - cd test - python nni_test/nnitest/naive_test.py - displayName: 'Naive test' - script: | cd test python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index e4ab5e39e6..941dcaedb8 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -22,10 +22,6 @@ jobs: cd test source scripts/unittest.sh displayName: 'Unit test' - - script: | - cd test - PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/naive_test.py - displayName: 'Naive test' - script: | cd test PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local From 88f2ebf6254ce438574f92121ea7e07375830bab Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 22:18:09 +0800 Subject: [PATCH 35/38] fix config path for win32 (#2212) --- test/nni_test/nnitest/run_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index a2c72c418e..054991c1c5 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -40,7 +40,7 @@ def update_training_service_config(config, training_service): deep_update(config, it_ts_config[training_service]) def prepare_config_file(test_case_config, it_config, args): - config_path = os.path.join(args.nni_source_dir, test_case_config['configFile']) + config_path = args.nni_source_dir + test_case_config['configFile'] test_yml_config = get_yml_content(config_path) # apply test case specific config @@ -221,7 +221,7 @@ def run(args): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, required=True) - parser.add_argument("--nni_source_dir", type=str, default='..') + parser.add_argument("--nni_source_dir", type=str, default='../') parser.add_argument("--cases", type=str, default=None) parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') From d0168b0bb078441e94327990f02562dc500d84bd Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 22:52:12 +0800 Subject: [PATCH 36/38] Fix test case trial concurrency (#2213) --- test/config/pr_tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml index 86040d1f92..acfa6ef13e 100644 --- a/test/config/pr_tests.yml +++ b/test/config/pr_tests.yml @@ -67,9 +67,15 @@ testCases: ######################################################################### - name: assessor-curvefitting configFile: test/config/assessors/curvefitting.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 - name: assessor-medianstop configFile: test/config/assessors/medianstop.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 ######################################################################### # nni tuners test From 9858637d1dda96083fcac4bfa4d20054b2c261ce Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sat, 21 Mar 2020 23:22:54 +0800 Subject: [PATCH 37/38] update test cases (#2214) --- test/config/pr_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml index acfa6ef13e..365b038d0b 100644 --- a/test/config/pr_tests.yml +++ b/test/config/pr_tests.yml @@ -16,6 +16,7 @@ testCases: launchCommand: python3 nni_test/nnitest/naive_test.py --config $configFile experimentStatusCheck: False stopCommand: + platform: linux darwin ######################################################################### # nni features test From 34252b3ee36f9242172f3488296d090babf4ea5e Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Tue, 24 Mar 2020 01:05:41 +0800 Subject: [PATCH 38/38] clean up (#2227) --- test/scripts/model_compression.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/scripts/model_compression.sh b/test/scripts/model_compression.sh index 969b100e94..c3a7754fb5 100644 --- a/test/scripts/model_compression.sh +++ b/test/scripts/model_compression.sh @@ -41,3 +41,5 @@ echo "===========================Testing: quantizers===========================" #echo "testing BNN quantizer..." #python3 BNN_quantizer_cifar10.py + +rm -rf ./checkpoints/*