-
Notifications
You must be signed in to change notification settings - Fork 1.8k
support display trial log on local mode #2718
Changes from 12 commits
482ef51
15f7706
dff2c2b
7315dac
520d9ad
a73db71
7b45ecf
6cd5e8f
4309a46
0d7974b
7545a83
6c7142d
183da4c
f702e97
607ed21
c71e296
3aa2183
1514a1b
9e0dead
3453bf6
2e98bd6
cd0333c
2f413da
92ef5c6
c0ed056
18ce44f
73989e0
dfabaad
40c0fd9
f7ec902
a01a1e8
5e0a5bf
5fbd017
d21d524
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ import { | |
NNIManagerStatus, ProfileUpdateType, TrialJobStatistics | ||
} from '../common/manager'; | ||
import { | ||
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus | ||
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus, LogType | ||
} from '../common/trainingService'; | ||
import { delay, getCheckpointDir, getExperimentRootDir, getLogDir, getMsgDispatcherCommand, mkDirP, getTunerProc, getLogLevel, isAlive, killPid } from '../common/utils'; | ||
import { | ||
|
@@ -325,6 +325,10 @@ class NNIManager implements Manager { | |
// FIXME: unit test | ||
} | ||
|
||
public async getTrialLog(trialJobId: string, logType: LogType): Promise<string> { | ||
return this.trainingService.getTrialLog(trialJobId, logType); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it might be better to catch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you explain how did you fix this one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the latest commit, I revert this to the original version (i.e., not handle the error here) since the front end will check the training service now. The error here could also be an file operation error, so I let the restHander to catch the error throwed here. |
||
} | ||
|
||
public getExperimentProfile(): Promise<ExperimentProfile> { | ||
// TO DO: using Promise.resolve() | ||
const deferred: Deferred<ExperimentProfile> = new Deferred<ExperimentProfile>(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,7 @@ class NNIRestHandler { | |
this.getMetricData(router); | ||
this.getMetricDataByRange(router); | ||
this.getLatestMetricData(router); | ||
this.getTrialLog(router); | ||
this.exportData(router); | ||
|
||
// Express-joi-validator configuration | ||
|
@@ -268,6 +269,19 @@ class NNIRestHandler { | |
}); | ||
} | ||
|
||
private getTrialLog(router: Router): void { | ||
router.get('/trial-log/:id/:type', async(req: Request, res: Response) => { | ||
this.nniManager.getTrialLog(req.params.id, req.params.type).then((log: string) => { | ||
if (log === '') { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't suggest sending unstructured infomation through REST API. The idea behind is that REST API is designed for sending "raw data" which can be easily understood by program, rather than "UI string". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think here |
||
log = 'No logs available.' | ||
} | ||
res.send(log); | ||
}).catch((err: Error) => { | ||
this.handleError(err, res); | ||
}); | ||
}); | ||
} | ||
|
||
private exportData(router: Router): void { | ||
router.get('/export-data', (req: Request, res: Response) => { | ||
this.nniManager.exportData().then((exportedData: string) => { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ import { getExperimentId } from '../../common/experimentStartupInfo'; | |
import { getLogger, Logger } from '../../common/log'; | ||
import { | ||
HyperParameters, TrainingService, TrialJobApplicationForm, | ||
TrialJobDetail, TrialJobMetric, TrialJobStatus | ||
TrialJobDetail, TrialJobMetric, TrialJobStatus, LogType | ||
} from '../../common/trainingService'; | ||
import { | ||
delay, generateParamFileName, getExperimentRootDir, getJobCancelStatus, getNewLine, isAlive, uniqueString | ||
|
@@ -184,6 +184,18 @@ class LocalTrainingService implements TrainingService { | |
return trialJob; | ||
} | ||
|
||
public async getTrialLog(trialJobId: string, logType: LogType): Promise<string> { | ||
QuanluZhang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let logPath: string; | ||
if (logType === 'TRIAL_LOG') { | ||
logPath = path.join(this.rootDir, 'trials', trialJobId, 'trial.log'); | ||
} else if (logType === 'TRIAL_STDERR') { | ||
logPath = path.join(this.rootDir, 'trials', trialJobId, 'stderr'); | ||
} else { | ||
throw new Error('unexpected log type'); | ||
} | ||
return fs.promises.readFile(logPath, 'utf8'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this readFile() function need to catch exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This exception should be catched by |
||
} | ||
|
||
public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { | ||
this.eventEmitter.on('metric', listener); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,13 +9,13 @@ import * as fs from 'fs'; | |
import * as path from 'path'; | ||
import { Deferred } from 'ts-deferred'; | ||
import * as component from '../../common/component'; | ||
import { NNIError, NNIErrorNames } from '../../common/errors'; | ||
import { NNIError, NNIErrorNames, MethodNotImplementedError } from '../../common/errors'; | ||
import { getExperimentId } from '../../common/experimentStartupInfo'; | ||
import { getLogger, Logger } from '../../common/log'; | ||
import { ObservableTimer } from '../../common/observableTimer'; | ||
import { | ||
HyperParameters, NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, | ||
TrialJobDetail, TrialJobMetric | ||
TrialJobDetail, TrialJobMetric, LogType | ||
} from '../../common/trainingService'; | ||
import { | ||
delay, generateParamFileName, getExperimentRootDir, getIPV4Address, getJobCancelStatus, | ||
|
@@ -173,6 +173,15 @@ class RemoteMachineTrainingService implements TrainingService { | |
} | ||
} | ||
|
||
/** | ||
* Get trial job log | ||
* @param _trialJobId ID of trial job | ||
* @param _logType 'TRIAL_LOG' | 'TRIAL_STDERR' | ||
*/ | ||
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The doc string should be written in base class. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. I write doc string in this class because I found other functions like |
||
throw new MethodNotImplementedError(); | ||
} | ||
|
||
/** | ||
* Add job metrics listener | ||
* @param listener callback listener | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does TRIAL_LOG means stdout log?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, corresponding to
trial.log
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we discussed and decided that it should be ERROR_MESSAGE
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change to
TRIAL_LOG
andTRIAL_ERROR