Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

modify run/resume/view api & fix logDir rewrite #3545

Merged
merged 4 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 35 additions & 28 deletions nni/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, config: ExperimentConfig) -> None:
"""
Prepare an experiment.

Use `Experiment.start()` to launch it.
Use `Experiment.run()` to launch it.

Parameters
----------
Expand All @@ -60,7 +60,7 @@ def __init__(self, training_service: Union[str, List[str]]) -> None:
experiment.config.trial_command = 'python3 trial.py'
experiment.config.machines.append(RemoteMachineConfig(ip=..., user_name=...))
...
experiment.start(8080)
experiment.run(8080)

Parameters
----------
Expand Down Expand Up @@ -149,27 +149,30 @@ def stop(self) -> None:
self._proc = None
_logger.info('Experiment stopped')

def run(self, port: int = 8080, debug: bool = False) -> bool:
def run(self, port: int = 8080, wait_completion: bool = True, debug: bool = False) -> bool:
"""
Run the experiment.

This function will block until experiment finish or error.
If wait_completion is True, this function will block until experiment finish or error.

Return `True` when experiment done; or return `False` when experiment failed.

Else if wait_completion is False, this function will non-block and return None immediately.
"""
self.start(port, debug)
try:
while True:
time.sleep(10)
status = self.get_status()
if status == 'DONE' or status == 'STOPPED':
return True
if status == 'ERROR':
return False
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
self.stop()
if wait_completion:
try:
while True:
time.sleep(10)
status = self.get_status()
if status == 'DONE' or status == 'STOPPED':
return True
if status == 'ERROR':
return False
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
self.stop()

@classmethod
def connect(cls, port: int):
Expand All @@ -194,7 +197,7 @@ def connect(cls, port: int):
return experiment

@classmethod
def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, debug: bool = False):
def resume(cls, experiment_id: str, port: int = 8080, wait_completion: bool = True, debug: bool = False):
"""
Resume a stopped experiment.

Expand All @@ -212,14 +215,12 @@ def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, deb
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'resume'
if wait_completion:
experiment.run(port, debug)
else:
experiment.start(port, debug)
experiment.run(port=port, wait_completion=wait_completion, debug=debug)
if not wait_completion:
return experiment

@classmethod
def view(cls, experiment_id: str, port: int, wait_completion: bool = True):
def view(cls, experiment_id: str, port: int = 8080, non_blocking: bool = False):
"""
View a stopped experiment.

Expand All @@ -229,18 +230,24 @@ def view(cls, experiment_id: str, port: int, wait_completion: bool = True):
The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
non_blocking
If false, run in the foreground. If true, run in the background.
"""
debug = False
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'view'
if wait_completion:
experiment.run(port, debug)
else:
experiment.start(port, debug)
experiment.start(port=port, debug=debug)
if non_blocking:
return experiment
else:
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
experiment.stop()

def get_status(self) -> str:
"""
Expand Down
7 changes: 6 additions & 1 deletion nni/tools/nnictl/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,10 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi
if package_name in ['SMAC', 'BOHB', 'PPOTuner']:
print_error(f'The dependencies for {package_name} can be installed through pip install nni[{package_name}]')
raise
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR
if config_version == 1:
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR
else:
log_dir = experiment_config['experimentWorkingDirectory'] if experiment_config.get('experimentWorkingDirectory') else NNI_HOME_DIR
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
#view experiment mode do not need debug function, when view an experiment, there will be no new logs created
foreground = False
Expand Down Expand Up @@ -486,8 +489,10 @@ def manage_stopped_experiment(args, mode):
assert 'trainingService' in experiment_config or 'trainingServicePlatform' in experiment_config
try:
if 'trainingService' in experiment_config:
experiment_config['experimentWorkingDirectory'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 2)
else:
experiment_config['logDir'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 1)
except Exception as exception:
restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/common/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { TrialJobStatus, LogType } from './trainingService';
import { ExperimentConfig } from './experimentConfig';

type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL' | 'VIEWED';
namespace ExperimentStartUpMode {
export const NEW = 'new';
export const RESUME = 'resume';
Expand Down
1 change: 1 addition & 0 deletions ts/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
this.readonly = readonly;
if (readonly) {
this.setStatus('VIEWED');
return Promise.resolve();
}

Expand Down
2 changes: 1 addition & 1 deletion ts/webui/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class App extends React.Component<{}, AppState> {
}

// experiment status and /trial-jobs api's status could decide website update
if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) {
if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) {
// experiment finished, refresh once more to ensure consistency
this.setState(() => ({ interval: 0, isUpdate: false }));
return;
Expand Down
2 changes: 1 addition & 1 deletion ts/webui/src/components/modals/ExperimentSummaryPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ExperimentSummaryPanel extends React.Component<ExpDrawerProps, ExpDrawerSt
this.setState({ experiment: JSON.stringify(result, null, 4) });
}

if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status)) {
if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status)) {
if (this.refreshId !== null || this.refreshId !== undefined) {
window.clearInterval(this.refreshId);
}
Expand Down
2 changes: 1 addition & 1 deletion ts/webui/src/components/trial-detail/TableList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ class TableList extends React.Component<TableListProps, TableListState> {

private _renderOperationColumn(record: any): React.ReactNode {
const runningTrial: boolean = ['RUNNING', 'UNKNOWN'].includes(record.status) ? false : true;
const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status);
const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status);
return (
<Stack className='detail-button' horizontal>
<PrimaryButton
Expand Down
1 change: 1 addition & 0 deletions ts/webui/src/static/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const EXPERIMENTSTATUS = [
'ERROR',
'STOPPING',
'STOPPED',
'VIEWED',
'DONE',
'NO_MORE_TRIAL',
'TUNER_NO_MORE_TRIAL'
Expand Down
6 changes: 4 additions & 2 deletions ts/webui/src/static/style/overview/probar.scss
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ $error: #a4262c;
}

.DONE,
.STOPPED {
.STOPPED,
.VIEWED {
color: $done;

.ms-ProgressIndicator-progressBar {
Expand All @@ -37,7 +38,8 @@ $error: #a4262c;

.bestMetric {
.DONE,
.STOPPED {
.STOPPED,
.VIEWED {
color: $done;
}

Expand Down