Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support monitor mode when creating or resuming a new experiment (#1933) #226

Merged
merged 1 commit into from
Jan 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/en_US/Tutorial/Nnictl.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ nnictl support commands:
|--config, -c| True| |YAML configure file of the experiment|
|--port, -p|False| |the port of restful server|
|--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|

* Examples

Expand Down Expand Up @@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper.
|id| True| |The id of the experiment you want to resume|
|--port, -p| False| |Rest port of the experiment you want to resume|
|--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|

* Example

Expand Down
4 changes: 2 additions & 2 deletions tools/bash-completion
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# list of commands/arguments
__nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top"
__nnictl_create_cmds="--config --port --debug"
__nnictl_resume_cmds="--port --debug"
__nnictl_create_cmds="--config --port --debug --watch"
__nnictl_resume_cmds="--port --debug --watch"
__nnictl_view_cmds="--port"
__nnictl_update_cmds="searchspace concurrency duration trialnum"
__nnictl_update_searchspace_cmds="--filename"
Expand Down
6 changes: 4 additions & 2 deletions tools/nni_cmd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
detect_port, get_user, get_python_dir
from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS
from .command_utils import check_output_command, kill_command
from .nnictl_utils import update_experiment
from .nnictl_utils import update_experiment, set_monitor

def get_log_path(config_file_name):
'''generate stdout and stderr log path'''
Expand Down Expand Up @@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
experiment_config['experimentName'])

print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
if args.watch:
set_monitor(True, 3, args.port, rest_process.pid)

def create_experiment(args):
'''start a new experiment'''
Expand All @@ -506,8 +508,8 @@ def create_experiment(args):
validate_all_content(experiment_config, config_path)

nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new', config_file_name)
nni_config.set_config('restServerPort', args.port)
launch_experiment(args, experiment_config, 'new', config_file_name)

def manage_stopped_experiment(args, mode):
'''view a stopped experiment'''
Expand Down
2 changes: 2 additions & 0 deletions tools/nni_cmd/nnictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ def parse_args():
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_start.set_defaults(func=create_experiment)

# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_resume.set_defaults(func=resume_experiment)

# parse view command
Expand Down
36 changes: 29 additions & 7 deletions tools/nni_cmd/nnictl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import csv
import os
import sys
import json
import time
import re
Expand Down Expand Up @@ -623,23 +624,44 @@ def show_experiment_info():
content[index].get('endTime'), content[index].get('status')))
print(TRIAL_MONITOR_TAIL)

def monitor_experiment(args):
'''monitor the experiment'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
def set_monitor(auto_exit, time_interval, port=None, pid=None):
'''set the experiment monitor engine'''
while True:
try:
os.system('clear')
if sys.platform == 'win32':
os.system('cls')
else:
os.system('clear')
update_experiment()
show_experiment_info()
time.sleep(args.time)
if auto_exit:
status = get_experiment_status(port)
if status in ['DONE', 'ERROR', 'STOPPED']:
print_normal('Experiment status is {0}.'.format(status))
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
exit(0)
time.sleep(time_interval)
except KeyboardInterrupt:
if auto_exit:
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
else:
print_normal('Exiting...')
exit(0)
except Exception as exception:
print_error(exception)
exit(1)

def monitor_experiment(args):
'''monitor the experiment'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
set_monitor(False, args.time)

def export_trials_data(args):
'''export experiment metadata to csv
'''
Expand Down