From bf2b92909c70c364369bbcf1a77db442f571d34e Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Wed, 8 Jan 2020 14:58:52 +0800 Subject: [PATCH] Support monitor mode when creating or resuming a new experiment (#1933) --- docs/en_US/Tutorial/Nnictl.md | 2 ++ tools/bash-completion | 4 ++-- tools/nni_cmd/launcher.py | 6 ++++-- tools/nni_cmd/nnictl.py | 2 ++ tools/nni_cmd/nnictl_utils.py | 36 ++++++++++++++++++++++++++++------- 5 files changed, 39 insertions(+), 11 deletions(-) diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md index 83c81f7145..b58d4c4a37 100644 --- a/docs/en_US/Tutorial/Nnictl.md +++ b/docs/en_US/Tutorial/Nnictl.md @@ -49,6 +49,7 @@ nnictl support commands: |--config, -c| True| |YAML configure file of the experiment| |--port, -p|False| |the port of restful server| |--debug, -d|False||set debug mode| + |--watch, -w|False||set watch mode| * Examples @@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper. |id| True| |The id of the experiment you want to resume| |--port, -p| False| |Rest port of the experiment you want to resume| |--debug, -d|False||set debug mode| + |--watch, -w|False||set watch mode| * Example diff --git a/tools/bash-completion b/tools/bash-completion index 86283d6ec7..031f9b3476 100644 --- a/tools/bash-completion +++ b/tools/bash-completion @@ -1,7 +1,7 @@ # list of commands/arguments __nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top" -__nnictl_create_cmds="--config --port --debug" -__nnictl_resume_cmds="--port --debug" +__nnictl_create_cmds="--config --port --debug --watch" +__nnictl_resume_cmds="--port --debug --watch" __nnictl_view_cmds="--port" __nnictl_update_cmds="searchspace concurrency duration trialnum" __nnictl_update_searchspace_cmds="--filename" diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py index 54ce4aec77..5d406a0ae3 100644 --- a/tools/nni_cmd/launcher.py +++ b/tools/nni_cmd/launcher.py @@ -20,7 +20,7 @@ detect_port, get_user, get_python_dir from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS from .command_utils import check_output_command, kill_command -from .nnictl_utils import update_experiment +from .nnictl_utils import update_experiment, set_monitor def get_log_path(config_file_name): '''generate stdout and stderr log path''' @@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen experiment_config['experimentName']) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) + if args.watch: + set_monitor(True, 3, args.port, rest_process.pid) def create_experiment(args): '''start a new experiment''' @@ -506,8 +508,8 @@ def create_experiment(args): validate_all_content(experiment_config, config_path) nni_config.set_config('experimentConfig', experiment_config) - launch_experiment(args, experiment_config, 'new', config_file_name) nni_config.set_config('restServerPort', args.port) + launch_experiment(args, experiment_config, 'new', config_file_name) def manage_stopped_experiment(args, mode): '''view a stopped experiment''' diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py index ab8e4153ca..856bd2adc8 100644 --- a/tools/nni_cmd/nnictl.py +++ b/tools/nni_cmd/nnictl.py @@ -51,6 +51,7 @@ def parse_args(): parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode') + parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode') parser_start.set_defaults(func=create_experiment) # parse resume command @@ -58,6 +59,7 @@ def parse_args(): parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume') parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode') + parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode') parser_resume.set_defaults(func=resume_experiment) # parse view command diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py index 3fad9c2599..a66197fac9 100644 --- a/tools/nni_cmd/nnictl_utils.py +++ b/tools/nni_cmd/nnictl_utils.py @@ -3,6 +3,7 @@ import csv import os +import sys import json import time import re @@ -623,23 +624,44 @@ def show_experiment_info(): content[index].get('endTime'), content[index].get('status'))) print(TRIAL_MONITOR_TAIL) -def monitor_experiment(args): - '''monitor the experiment''' - if args.time <= 0: - print_error('please input a positive integer as time interval, the unit is second.') - exit(1) +def set_monitor(auto_exit, time_interval, port=None, pid=None): + '''set the experiment monitor engine''' while True: try: - os.system('clear') + if sys.platform == 'win32': + os.system('cls') + else: + os.system('clear') update_experiment() show_experiment_info() - time.sleep(args.time) + if auto_exit: + status = get_experiment_status(port) + if status in ['DONE', 'ERROR', 'STOPPED']: + print_normal('Experiment status is {0}.'.format(status)) + print_normal('Stopping experiment...') + kill_command(pid) + print_normal('Stop experiment success.') + exit(0) + time.sleep(time_interval) except KeyboardInterrupt: + if auto_exit: + print_normal('Stopping experiment...') + kill_command(pid) + print_normal('Stop experiment success.') + else: + print_normal('Exiting...') exit(0) except Exception as exception: print_error(exception) exit(1) +def monitor_experiment(args): + '''monitor the experiment''' + if args.time <= 0: + print_error('please input a positive integer as time interval, the unit is second.') + exit(1) + set_monitor(False, args.time) + def export_trials_data(args): '''export experiment metadata to csv '''