Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Support experiment view #1524

Merged
merged 28 commits into from
Sep 26, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/en_US/Tutorial/Nnictl.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ nnictl support commands:

* [nnictl create](#create)
* [nnictl resume](#resume)
* [nnictl view](#view)
* [nnictl stop](#stop)
* [nnictl update](#update)
* [nnictl trial](#trial)
Expand Down Expand Up @@ -104,6 +105,35 @@ Debug mode will disable version check function in Trialkeeper.
nnictl resume [experiment_id] --port 8088
```

<a name="view"></a>

![](https://placehold.it/15/1589F0/000000?text=+) `nnictl view`

* Description

You can use this command to view a stopped experiment.

* Usage

```bash
nnictl view [OPTIONS]
```

* Options

|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| True| |The id of the experiment you want to view|
|--port, -p| False| |Rest port of the experiment you want to view|

* Example

> view an experiment with specified port 8088

```bash
nnictl view [experiment_id] --port 8088
```

<a name="stop"></a>
![](https://placehold.it/15/1589F0/000000?text=+) `nnictl stop`

Expand Down
21 changes: 10 additions & 11 deletions src/nni_manager/common/experimentStartupInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,17 @@ import * as component from '../common/component';
@component.Singleton
class ExperimentStartupInfo {
private experimentId: string = '';
private newExperiment: boolean = true;
private experimentMode: string = '';
private basePort: number = -1;
private initialized: boolean = false;
private initTrialSequenceID: number = 0;
private logDir: string = '';
private logLevel: string = '';

public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
public setStartupInfo(experimentMode: string, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
assert(!this.initialized);
assert(experimentId.trim().length > 0);

this.newExperiment = newExperiment;
this.experimentMode = experimentMode;
this.experimentId = experimentId;
this.basePort = basePort;
this.initialized = true;
Expand Down Expand Up @@ -66,10 +65,10 @@ class ExperimentStartupInfo {
return this.basePort;
}

public isNewExperiment(): boolean {
public getExperimentMode(): string {
assert(this.initialized);

return this.newExperiment;
return this.experimentMode;
}

public getLogDir(): string {
Expand Down Expand Up @@ -104,8 +103,8 @@ function getBasePort(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getBasePort();
}

function isNewExperiment(): boolean {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isNewExperiment();
function getExperimentMode(): string {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getExperimentMode();
}

function setInitTrialSequenceId(initSequenceId: number): void {
Expand All @@ -121,10 +120,10 @@ function getExperimentStartupInfo(): ExperimentStartupInfo {
}

function setExperimentStartupInfo(
newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
experimentMode: string, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo)
.setStartupInfo(newExperiment, experimentId, basePort, logDir, logLevel);
.setStartupInfo(experimentMode, experimentId, basePort, logDir, logLevel);
}

export { ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment, getExperimentStartupInfo,
export { ExperimentStartupInfo, getBasePort, getExperimentId, getExperimentMode, getExperimentStartupInfo,
setExperimentStartupInfo, setInitTrialSequenceId, getInitTrialSequenceId };
10 changes: 8 additions & 2 deletions src/nni_manager/common/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ import { MetricDataRecord, MetricType, TrialJobInfo } from './datastore';
import { TrialJobStatus } from './trainingService';

type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL' | 'VIEWING';
namespace ExperimentStartUpMode {
export const NEW = 'new';
export const RESUME = 'resume';
export const VIEW = 'view';
}

interface ExperimentParams {
authorName: string;
Expand Down Expand Up @@ -97,6 +102,7 @@ abstract class Manager {
public abstract startExperiment(experimentParams: ExperimentParams): Promise<string>;
public abstract resumeExperiment(): Promise<void>;
public abstract stopExperiment(): Promise<void>;
public abstract viewExperiment(): Promise<void>;
public abstract getExperimentProfile(): Promise<ExperimentProfile>;
public abstract updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void>;
public abstract importData(data: string): Promise<void>;
Expand All @@ -115,4 +121,4 @@ abstract class Manager {
public abstract getStatus(): NNIManagerStatus;
}

export { Manager, ExperimentParams, ExperimentProfile, TrialJobStatistics, ProfileUpdateType, NNIManagerStatus, ExperimentStatus };
export { Manager, ExperimentParams, ExperimentProfile, TrialJobStatistics, ProfileUpdateType, NNIManagerStatus, ExperimentStatus, ExperimentStartUpMode };
4 changes: 2 additions & 2 deletions src/nni_manager/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import * as util from 'util';

import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, getExperimentId, getExperimentStartupInfo, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager';
import { Manager, ExperimentStartUpMode } from './manager';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this import necessary? it seems not used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed.

import { TrialConfig } from '../training_service/common/trialConfig';
import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService';
import { getLogger } from './log';
Expand Down Expand Up @@ -276,7 +276,7 @@ function prepareUnitTest(): void {
Container.snapshot(TrainingService);
Container.snapshot(Manager);

setExperimentStartupInfo(true, 'unittest', 8080);
setExperimentStartupInfo(ExperimentStartUpMode.NEW, 'unittest', 8080);
mkDirPSync(getLogDir());

const sqliteFile: string = path.join(getDefaultDatabaseDir(), 'nni.sqlite');
Expand Down
6 changes: 3 additions & 3 deletions src/nni_manager/core/nniDataStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ import { Database, DataStore, MetricData, MetricDataRecord, MetricType,
TrialJobEvent, TrialJobEventRecord, TrialJobInfo, HyperParameterFormat,
ExportedDataFormat } from '../common/datastore';
import { NNIError } from '../common/errors';
import { getExperimentId, isNewExperiment } from '../common/experimentStartupInfo';
import { getExperimentId, getExperimentMode } from '../common/experimentStartupInfo';
import { getLogger, Logger } from '../common/log';
import { ExperimentProfile, TrialJobStatistics } from '../common/manager';
import { ExperimentProfile, TrialJobStatistics, ExperimentStartUpMode } from '../common/manager';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the new import not used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed.

import { TrialJobDetail, TrialJobStatus } from '../common/trainingService';
import { getDefaultDatabaseDir, mkDirP } from '../common/utils';

Expand All @@ -47,7 +47,7 @@ class NNIDataStore implements DataStore {

// TODO support specify database dir
const databaseDir: string = getDefaultDatabaseDir();
if(isNewExperiment()) {
if(getExperimentMode() === ExperimentStartUpMode.NEW) {
mkDirP(databaseDir).then(() => {
this.db.init(true, databaseDir).then(() => {
this.log.info('Datastore initialization done');
Expand Down
7 changes: 6 additions & 1 deletion src/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,18 @@ class NNIManager implements Manager {
return this.experimentProfile.id;
}

public async viewExperiment(): Promise<void> {
this.log.info(`Viewing experiment: ${this.experimentProfile.id}`);
this.experimentProfile = await this.dataStore.getExperimentProfile(getExperimentId());
this.setStatus('VIEWING');
QuanluZhang marked this conversation as resolved.
Show resolved Hide resolved
}

public async resumeExperiment(): Promise<void> {
this.log.info(`Resuming experiment: ${this.experimentProfile.id}`);
//Fetch back the experiment profile
const experimentId: string = getExperimentId();
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
const expParams: ExperimentParams = this.experimentProfile.params;

setInitTrialSequenceId(this.experimentProfile.maxSequenceId + 1);

// Set up multiphase config
Expand Down
19 changes: 9 additions & 10 deletions src/nni_manager/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ import { Container, Scope } from 'typescript-ioc';
import * as fs from 'fs';
import * as component from './common/component';
import { Database, DataStore } from './common/datastore';
import { setExperimentStartupInfo } from './common/experimentStartupInfo';
import { setExperimentStartupInfo, getExperimentMode } from './common/experimentStartupInfo';
import { getLogger, Logger, logLevelNameMap } from './common/log';
import { Manager } from './common/manager';
import { Manager, ExperimentStartUpMode } from './common/manager';
import { TrainingService } from './common/trainingService';
import { getLogDir, mkDirP, parseArg, uniqueString } from './common/utils';
import { NNIDataStore } from './core/nniDataStore';
Expand All @@ -42,11 +42,10 @@ import {
} from './training_service/remote_machine/remoteMachineTrainingService';

function initStartupInfo(
startExpMode: string, resumeExperimentId: string, basePort: number,
startExpMode: string, experimentId: string, basePort: number,
logDirectory: string, experimentLogLevel: string): void {
const createNew: boolean = (startExpMode === 'new');
const expId: string = createNew ? uniqueString(8) : resumeExperimentId;
setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel);
const expId: string = startExpMode === ExperimentStartUpMode.NEW ? uniqueString(8) : experimentId;
setExperimentStartupInfo(startExpMode, expId, basePort, logDirectory, experimentLogLevel);
}

async function initContainer(platformMode: string): Promise<void> {
Expand Down Expand Up @@ -89,7 +88,7 @@ async function initContainer(platformMode: string): Promise<void> {

function usage(): void {
console.info('usage: node main.js --port <port> --mode \
<local/remote/pai/kubeflow/frameworkcontroller> --start_mode <new/resume> --experiment_id <id>');
<local/remote/pai/kubeflow/frameworkcontroller> --start_mode <new/resume/view> --experiment_id <id>');
}

const strPort: string = parseArg(['--port', '-p']);
Expand All @@ -108,15 +107,15 @@ if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'].includes(mode
}

const startMode: string = parseArg(['--start_mode', '-s']);
if (!['new', 'resume'].includes(startMode)) {
if (![ExperimentStartUpMode.NEW, ExperimentStartUpMode.RESUME, ExperimentStartUpMode.VIEW].includes(startMode)) {
console.log(`FATAL: unknown start_mode: ${startMode}`);
usage();
process.exit(1);
}

const experimentId: string = parseArg(['--experiment_id', '-id']);
if (startMode === 'resume' && experimentId.trim().length < 1) {
console.log(`FATAL: cannot resume experiment, invalid experiment_id: ${experimentId}`);
if ((startMode === ExperimentStartUpMode.RESUME || startMode === ExperimentStartUpMode.VIEW) && experimentId.trim().length < 1) {
console.log(`FATAL: cannot resume or view the experiment, invalid experiment_id: ${experimentId}`);
usage();
process.exit(1);
}
Expand Down
96 changes: 67 additions & 29 deletions src/nni_manager/rest_server/restHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ import * as path from 'path';
import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
import { isNewExperiment } from '../common/experimentStartupInfo';
import { getExperimentMode } from '../common/experimentStartupInfo';
import { getLogger, Logger } from '../common/log';
import { ExperimentProfile, Manager, TrialJobStatistics} from '../common/manager';
import { ExperimentProfile, Manager, TrialJobStatistics, ExperimentStartUpMode } from '../common/manager';
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
import { getVersion } from '../common/utils';
Expand Down Expand Up @@ -139,11 +139,18 @@ class NNIRestHandler {

private updateExperimentProfile(router: Router): void {
router.put('/experiment', expressJoi(ValidationSchemas.UPDATEEXPERIMENT), (req: Request, res: Response) => {
this.nniManager.updateExperimentProfile(req.body, req.query.update_type).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
let experimentMode: string = getExperimentMode();
if( experimentMode !== ExperimentStartUpMode.VIEW) {
this.nniManager.updateExperimentProfile(req.body, req.query.update_type).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
} else {
let message = `Could not update experiment in view mode!`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lvybriage could webui correctly handle this message?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, send 400 code back.

this.log.warning(message);
res.send(message);
}
});
}

Expand All @@ -159,7 +166,8 @@ class NNIRestHandler {

private startExperiment(router: Router): void {
router.post('/experiment', expressJoi(ValidationSchemas.STARTEXPERIMENT), (req: Request, res: Response) => {
if (isNewExperiment()) {
let experimentMode: string = getExperimentMode();
if (experimentMode === ExperimentStartUpMode.NEW) {
this.nniManager.startExperiment(req.body).then((eid: string) => {
res.send({
experiment_id: eid
Expand All @@ -168,13 +176,20 @@ class NNIRestHandler {
// Start experiment is a step of initialization, so any exception thrown is a fatal
this.handle_error(err, res);
});
} else {
} else if (experimentMode === ExperimentStartUpMode.RESUME){
this.nniManager.resumeExperiment().then(() => {
res.send();
}).catch((err: Error) => {
// Resume experiment is a step of initialization, so any exception thrown is a fatal
this.handle_error(err, res);
});
} else if (experimentMode === ExperimentStartUpMode.VIEW){
this.nniManager.viewExperiment().then(() => {
res.send();
}).catch((err: Error) => {
// View experiment is a step of initialization, so any exception thrown is a fatal
this.handle_error(err, res);
});
}
});
}
Expand All @@ -193,18 +208,26 @@ class NNIRestHandler {
router.put(
'/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
async (req: Request, res: Response) => {
// tslint:disable-next-line:no-any
const metadata: any = req.body;
const keys: string[] = Object.keys(metadata);
try {
for (const key of keys) {
await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
let experimentMode: string = getExperimentMode();
if(experimentMode !== ExperimentStartUpMode.VIEW) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

white space

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed.

// tslint:disable-next-line:no-any
const metadata: any = req.body;
const keys: string[] = Object.keys(metadata);
try {
for (const key of keys) {
await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
}
res.send();
} catch (err) {
// setClusterMetata is a step of initialization, so any exception thrown is a fatal
this.handle_error(NNIError.FromError(err), res, true);
}
res.send();
} catch (err) {
// setClusterMetata is a step of initialization, so any exception thrown is a fatal
this.handle_error(NNIError.FromError(err), res, true);
} else {
let message = `Could not set cluster-metadata in view mode!`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does nnictl handle this error message?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, nnictl handles this error message

this.log.warning(message);
res.send(message);
}

});
}

Expand Down Expand Up @@ -234,21 +257,36 @@ class NNIRestHandler {

private addTrialJob(router: Router): void {
router.post('/trial-jobs', async (req: Request, res: Response) => {
this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
let experimentMode: string = getExperimentMode();
if(experimentMode !== ExperimentStartUpMode.VIEW) {
this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
} else {
let message = `Could not add customized trial in view mode!`;
QuanluZhang marked this conversation as resolved.
Show resolved Hide resolved
this.log.warning(message);
res.send(message);
}
});
}

private cancelTrialJob(router: Router): void {
router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
let experimentMode: string = getExperimentMode();
if(experimentMode !== ExperimentStartUpMode.VIEW) {
this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
res.send();
}).catch((err: Error) => {
this.handle_error(err, res);
});
} else {
let message = `Could not delete trial job in view mode!`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this.log.warning(message);
res.send(message);
}

});
}

Expand Down
Loading