From 521f1917c09eb4653b104df2aa01c99388d42a15 Mon Sep 17 00:00:00 2001 From: liuzhe-lz <40699903+liuzhe-lz@users.noreply.github.com> Date: Thu, 3 Jun 2021 12:06:06 +0800 Subject: [PATCH] Fix a logging related bug (#3705) --- ts/nni_manager/common/pythonScript.ts | 2 +- ts/nni_manager/common/restServer.ts | 2 +- ts/nni_manager/core/ipcInterface.ts | 2 +- ts/nni_manager/core/nniDataStore.ts | 5 ++--- ts/nni_manager/core/nniExperimentsManager.ts | 2 +- ts/nni_manager/core/nniTensorboardManager.ts | 2 +- ts/nni_manager/core/nnimanager.ts | 12 ++++++------ ts/nni_manager/core/sqlDatabase.ts | 16 ++++++++-------- ts/nni_manager/rest_server/restHandler.ts | 4 ++-- .../common/clusterJobRestServer.ts | 4 ++-- ts/nni_manager/training_service/common/util.ts | 2 +- .../kubernetes/azureStorageClientUtils.ts | 14 +++++++------- .../kubernetes/kubernetesApiClient.ts | 4 ++-- .../kubernetes/kubernetesJobInfoCollector.ts | 2 +- .../kubernetes/kubernetesTrainingService.ts | 2 +- .../training_service/local/gpuScheduler.ts | 2 +- .../local/localTrainingService.ts | 4 ++-- .../training_service/pai/paiJobInfoCollector.ts | 2 +- .../training_service/pai/paiJobRestServer.ts | 2 +- .../training_service/pai/paiTrainingService.ts | 4 ++-- .../remote_machine/gpuScheduler.ts | 2 +- .../remoteMachineTrainingService.ts | 2 +- .../remote_machine/shellExecutor.ts | 2 +- .../reusable/channels/webCommandChannel.ts | 4 ++-- .../training_service/reusable/commandChannel.ts | 2 +- .../training_service/reusable/environment.ts | 4 ++-- .../environments/amlEnvironmentService.ts | 2 +- .../environments/localEnvironmentService.ts | 2 +- .../environments/openPaiEnvironmentService.ts | 6 +++--- .../environments/remoteEnvironmentService.ts | 2 +- .../training_service/reusable/gpuScheduler.ts | 2 +- .../shared_storages/azureblobStorageService.ts | 2 +- .../shared_storages/nfsStorageService.ts | 2 +- .../training_service/reusable/storageService.ts | 2 +- 34 files changed, 62 insertions(+), 63 deletions(-) diff --git a/ts/nni_manager/common/pythonScript.ts b/ts/nni_manager/common/pythonScript.ts index 10e6b8311a..d3a2774204 100644 --- a/ts/nni_manager/common/pythonScript.ts +++ b/ts/nni_manager/common/pythonScript.ts @@ -22,7 +22,7 @@ export async function runPythonScript(script: string, logger?: Logger): Promise< if (stderr) { if (logger === undefined) { - logger = getLogger(); + logger = getLogger('pythonScript'); } logger.warning('python script has stderr.'); logger.warning('script:', script); diff --git a/ts/nni_manager/common/restServer.ts b/ts/nni_manager/common/restServer.ts index 368aff977c..efa1d056eb 100644 --- a/ts/nni_manager/common/restServer.ts +++ b/ts/nni_manager/common/restServer.ts @@ -25,7 +25,7 @@ export abstract class RestServer { protected hostName: string = '0.0.0.0'; protected port?: number; protected app: express.Application = express(); - protected log: Logger = getLogger(); + protected log: Logger = getLogger('RestServer'); protected basePort?: number; constructor() { diff --git a/ts/nni_manager/core/ipcInterface.ts b/ts/nni_manager/core/ipcInterface.ts index c6fbf3d52b..41bc0c28b3 100644 --- a/ts/nni_manager/core/ipcInterface.ts +++ b/ts/nni_manager/core/ipcInterface.ts @@ -56,7 +56,7 @@ class IpcInterface { private incomingStream: Readable; private eventEmitter: EventEmitter; private readBuffer: Buffer; - private logger: Logger = getLogger(); + private logger: Logger = getLogger('IpcInterface'); /** * Construct a IPC proxy diff --git a/ts/nni_manager/core/nniDataStore.ts b/ts/nni_manager/core/nniDataStore.ts index 1d0ac3adef..e60a387a80 100644 --- a/ts/nni_manager/core/nniDataStore.ts +++ b/ts/nni_manager/core/nniDataStore.ts @@ -19,7 +19,7 @@ import { getDefaultDatabaseDir, mkDirP } from '../common/utils'; class NNIDataStore implements DataStore { private db: Database = component.get(Database); - private log: Logger = getLogger(); + private log: Logger = getLogger('NNIDataStore'); private initTask!: Deferred; public init(): Promise { @@ -71,7 +71,6 @@ class NNIDataStore implements DataStore { public storeTrialJobEvent( event: TrialJobEvent, trialJobId: string, hyperParameter?: string, jobDetail?: TrialJobDetail): Promise { - //this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`); // Use the timestamp in jobDetail as TrialJobEvent timestamp for different events let timestamp: number | undefined; @@ -243,7 +242,7 @@ class NNIDataStore implements DataStore { for (const metric of metrics) { const existMetrics: MetricDataRecord[] | undefined = map.get(metric.trialJobId); if (existMetrics !== undefined) { - this.log.error(`Found multiple FINAL results for trial job ${trialJobId}, metrics: ${JSON.stringify(metrics)}`); + this.log.error(`Found multiple FINAL results for trial job ${trialJobId}, metrics:`, metrics); } else { map.set(metric.trialJobId, [metric]); } diff --git a/ts/nni_manager/core/nniExperimentsManager.ts b/ts/nni_manager/core/nniExperimentsManager.ts index e345817406..10712d2f2e 100644 --- a/ts/nni_manager/core/nniExperimentsManager.ts +++ b/ts/nni_manager/core/nniExperimentsManager.ts @@ -30,7 +30,7 @@ class NNIExperimentsManager implements ExperimentManager { constructor() { this.experimentsPath = getExperimentsInfoPath(); - this.log = getLogger(); + this.log = getLogger('NNIExperimentsManager'); this.profileUpdateTimer = {}; } diff --git a/ts/nni_manager/core/nniTensorboardManager.ts b/ts/nni_manager/core/nniTensorboardManager.ts index f3993b9dd7..3f8af1fd4e 100644 --- a/ts/nni_manager/core/nniTensorboardManager.ts +++ b/ts/nni_manager/core/nniTensorboardManager.ts @@ -37,7 +37,7 @@ class NNITensorboardManager implements TensorboardManager { private nniManager: Manager; constructor() { - this.log = getLogger(); + this.log = getLogger('NNITensorboardManager'); this.tensorboardTaskMap = new Map(); this.setTensorboardVersion(); this.nniManager = component.get(Manager); diff --git a/ts/nni_manager/core/nnimanager.ts b/ts/nni_manager/core/nnimanager.ts index 8dd69a46f3..2f94465c79 100644 --- a/ts/nni_manager/core/nnimanager.ts +++ b/ts/nni_manager/core/nnimanager.ts @@ -61,7 +61,7 @@ class NNIManager implements Manager { this.trialDataForTuner = ''; this.readonly = false; - this.log = getLogger(); + this.log = getLogger('NNIManager'); this.dataStore = component.get(DataStore); this.status = { status: 'INITIALIZED', @@ -659,7 +659,7 @@ class NNIManager implements Manager { } const form = this.waitingTrials.shift() as TrialJobApplicationForm; this.currSubmittedTrialNum++; - this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`); + this.log.info('submitTrialJob: form:', form); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form); const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail); await this.storeExperimentProfile(); @@ -732,7 +732,7 @@ class NNIManager implements Manager { } private async onTrialJobMetrics(metric: TrialJobMetric): Promise { - this.log.debug(`NNIManager received trial job metrics: ${JSON.stringify(metric)}`); + this.log.debug('NNIManager received trial job metrics:', metric); if (this.trialJobs.has(metric.id)){ await this.dataStore.storeMetricData(metric.id, metric.data); if (this.dispatcher === undefined) { @@ -740,7 +740,7 @@ class NNIManager implements Manager { } this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data); } else { - this.log.warning(`NNIManager received non-existent trial job metrics: ${metric}`); + this.log.warning('NNIManager received non-existent trial job metrics:', metric); } } @@ -804,7 +804,7 @@ class NNIManager implements Manager { index: tunerCommand.parameter_index } }; - this.log.info(`updateTrialJob: job id: ${tunerCommand.trial_job_id}, form: ${JSON.stringify(trialJobForm)}`); + this.log.info('updateTrialJob: job id:', tunerCommand.trial_job_id, 'form:', trialJobForm); await this.trainingService.updateTrialJob(tunerCommand.trial_job_id, trialJobForm); if (tunerCommand['parameters'] !== null) { // parameters field is set as empty string if no more hyper parameter can be generated by tuner. @@ -820,7 +820,7 @@ class NNIManager implements Manager { break; } case KILL_TRIAL_JOB: { - this.log.info(`cancelTrialJob: ${JSON.parse(content)}`); + this.log.info('cancelTrialJob:', content); await this.trainingService.cancelTrialJob(JSON.parse(content), true); break; } diff --git a/ts/nni_manager/core/sqlDatabase.ts b/ts/nni_manager/core/sqlDatabase.ts index 5ccb29c06d..4a15c28b87 100644 --- a/ts/nni_manager/core/sqlDatabase.ts +++ b/ts/nni_manager/core/sqlDatabase.ts @@ -80,7 +80,7 @@ function loadMetricData(row: any): MetricDataRecord { class SqlDB implements Database { private db!: sqlite3.Database; - private log: Logger = getLogger(); + private log: Logger = getLogger('SqlDB'); private initTask!: Deferred; public init(createNew: boolean, dbDir: string): Promise { @@ -130,7 +130,7 @@ class SqlDB implements Database { exp.nextSequenceId, exp.revision ]; - this.log.trace(`storeExperimentProfile: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`storeExperimentProfile: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); }); @@ -147,7 +147,7 @@ class SqlDB implements Database { sql = 'select * from ExperimentProfile where id=? and revision=?'; args = [experimentId, revision]; } - this.log.trace(`queryExperimentProfile: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`queryExperimentProfile: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.all(sql, args, (err: Error | null, rows: any[]) => { this.resolve(deferred, err, rows, loadExperimentProfile); @@ -170,7 +170,7 @@ class SqlDB implements Database { const message: string | undefined = jobDetail === undefined ? undefined : jobDetail.message; const args: any[] = [timestamp, trialJobId, event, hyperParameter, logPath, sequenceId, message]; - this.log.trace(`storeTrialJobEvent: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`storeTrialJobEvent: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); }); @@ -193,7 +193,7 @@ class SqlDB implements Database { args = [trialJobId, event]; } - this.log.trace(`queryTrialJobEvent: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`queryTrialJobEvent: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.all(sql, args, (err: Error | null, rows: any[]) => { this.resolve(deferred, err, rows, loadTrialJobEvent); @@ -207,7 +207,7 @@ class SqlDB implements Database { const json: MetricDataRecord = JSON.parse(data); const args: any[] = [Date.now(), json.trialJobId, json.parameterId, json.type, json.sequence, JSON.stringify(json.data)]; - this.log.trace(`storeMetricData: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`storeMetricData: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); }); @@ -230,7 +230,7 @@ class SqlDB implements Database { args = [trialJobId, metricType]; } - this.log.trace(`queryMetricData: SQL: ${sql}, args: ${JSON.stringify(args)}`); + this.log.trace(`queryMetricData: SQL: ${sql}, args:`, args); const deferred: Deferred = new Deferred(); this.db.all(sql, args, (err: Error | null, rows: any[]) => { this.resolve(deferred, err, rows, loadMetricData); @@ -259,7 +259,7 @@ class SqlDB implements Database { for (const row of (rows)) { data.push(rowLoader(row)); } - this.log.trace(`sql query result: ${JSON.stringify(data)}`); + this.log.trace(`sql query result:`, data); (>deferred).resolve(data); } } diff --git a/ts/nni_manager/rest_server/restHandler.ts b/ts/nni_manager/rest_server/restHandler.ts index 9d528dab06..21f4f1f015 100644 --- a/ts/nni_manager/rest_server/restHandler.ts +++ b/ts/nni_manager/rest_server/restHandler.ts @@ -32,14 +32,14 @@ class NNIRestHandler { this.experimentsManager = component.get(ExperimentManager); this.tensorboardManager = component.get(TensorboardManager); this.restServer = rs; - this.log = getLogger(); + this.log = getLogger('NNIRestHandler'); } public createRestHandler(): Router { const router: Router = Router(); router.use((req: Request, res: Response, next) => { - this.log.debug(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`); + this.log.debug(`${req.method}: ${req.url}: body:`, req.body); res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept'); res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS'); diff --git a/ts/nni_manager/training_service/common/clusterJobRestServer.ts b/ts/nni_manager/training_service/common/clusterJobRestServer.ts index 49298744a3..43501046a9 100644 --- a/ts/nni_manager/training_service/common/clusterJobRestServer.ts +++ b/ts/nni_manager/training_service/common/clusterJobRestServer.ts @@ -76,7 +76,7 @@ export abstract class ClusterJobRestServer extends RestServer { const router: Router = Router(); router.use((req: Request, res: Response, next: any) => { - this.log.info(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`); + this.log.info(`${req.method}: ${req.url}: body:`, req.body); res.setHeader('Content-Type', 'application/json'); next(); }); @@ -109,7 +109,7 @@ export abstract class ClusterJobRestServer extends RestServer { router.post(`/update-metrics/${this.expId}/:trialId`, (req: Request, res: Response) => { try { this.log.info(`Get update-metrics request, trial job id is ${req.params.trialId}`); - this.log.info(`update-metrics body is ${JSON.stringify(req.body)}`); + this.log.info('update-metrics body is', req.body); this.handleTrialMetrics(req.body.jobId, req.body.metrics); diff --git a/ts/nni_manager/training_service/common/util.ts b/ts/nni_manager/training_service/common/util.ts index c88c9cd957..122be7c387 100644 --- a/ts/nni_manager/training_service/common/util.ts +++ b/ts/nni_manager/training_service/common/util.ts @@ -111,7 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise< await fs.promises.mkdir(destPath); } } else { - getLogger().debug(`Copying file from ${sourcePath} to ${destPath}`); + getLogger('execCopydir').debug(`Copying file from ${sourcePath} to ${destPath}`); await fs.promises.copyFile(sourcePath, destPath); } } diff --git a/ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts b/ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts index fb393d186c..c589f64e92 100644 --- a/ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts +++ b/ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts @@ -22,7 +22,7 @@ export namespace AzureStorageClientUtility { const deferred: Deferred = new Deferred(); fileServerClient.createShareIfNotExists(azureShare, (error: any, _result: any, _response: any) => { if (error) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`Create share failed:, ${error}`); deferred.resolve(false); } else { @@ -43,7 +43,7 @@ export namespace AzureStorageClientUtility { const deferred: Deferred = new Deferred(); fileServerClient.createDirectoryIfNotExists(azureShare, azureFoler, (error: any, _result: any, _response: any) => { if (error) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`Create directory failed:, ${error}`); deferred.resolve(false); } else { @@ -91,7 +91,7 @@ export namespace AzureStorageClientUtility { await fileServerClient.createFileFromLocalFile(azureShare, azureDirectory, azureFileName, localFilePath, (error: any, _result: any, _response: any) => { if (error) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`Upload file failed:, ${error}`); deferred.resolve(false); } else { @@ -116,7 +116,7 @@ export namespace AzureStorageClientUtility { await fileServerClient.getFileToStream(azureShare, azureDirectory, azureFileName, fs.createWriteStream(localFilePath), (error: any, _result: any, _response: any) => { if (error) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`Download file failed:, ${error}`); deferred.resolve(false); } else { @@ -185,19 +185,19 @@ export namespace AzureStorageClientUtility { fileServerClient.listFilesAndDirectoriesSegmented(azureShare, azureDirectory, 'null', async (_error: any, result: any, _response: any) => { if (('entries' in result) === false) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`list files failed, can't get entries in result`); throw new Error(`list files failed, can't get entries in result`); } if (('files' in result.entries) === false) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`list files failed, can't get files in result['entries']`); throw new Error(`list files failed, can't get files in result['entries']`); } if (('directories' in result.directories) === false) { - getLogger() + getLogger('AzureStorageClientUtility') .error(`list files failed, can't get directories in result['entries']`); throw new Error(`list files failed, can't get directories in result['entries']`); } diff --git a/ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts b/ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts index ea3dc459f8..84784a05ce 100644 --- a/ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts +++ b/ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts @@ -12,7 +12,7 @@ import {getLogger, Logger} from '../../common/log'; */ class GeneralK8sClient { protected readonly client: any; - protected readonly log: Logger = getLogger(); + protected readonly log: Logger = getLogger('GeneralK8sClient'); protected namespace: string = 'default'; constructor() { @@ -135,7 +135,7 @@ class GeneralK8sClient { */ abstract class KubernetesCRDClient { protected readonly client: any; - protected readonly log: Logger = getLogger(); + protected readonly log: Logger = getLogger('KubernetesCRDClient'); protected crdSchema: any; constructor() { diff --git a/ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts b/ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts index 2865ff19fd..231f088efe 100644 --- a/ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts +++ b/ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts @@ -15,7 +15,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData'; */ export class KubernetesJobInfoCollector { protected readonly trialJobsMap: Map; - protected readonly log: Logger = getLogger(); + protected readonly log: Logger = getLogger('KubernetesJobInfoCollector'); protected readonly statusesNeedToCheck: TrialJobStatus[]; constructor(jobMap: Map) { diff --git a/ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts b/ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts index 147895ea28..b747ad84fc 100644 --- a/ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts +++ b/ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts @@ -54,7 +54,7 @@ abstract class KubernetesTrainingService { protected expContainerCodeFolder: string; constructor() { - this.log = getLogger(); + this.log = getLogger('KubernetesTrainingService'); this.metricsEmitter = new EventEmitter(); this.trialJobsMap = new Map(); this.trialLocalTempFolder = path.join(getExperimentRootDir(), 'trials-nfs-tmp'); diff --git a/ts/nni_manager/training_service/local/gpuScheduler.ts b/ts/nni_manager/training_service/local/gpuScheduler.ts index c437a2708d..0283988b0f 100644 --- a/ts/nni_manager/training_service/local/gpuScheduler.ts +++ b/ts/nni_manager/training_service/local/gpuScheduler.ts @@ -24,7 +24,7 @@ class GPUScheduler { constructor() { this.stopping = false; - this.log = getLogger(); + this.log = getLogger('GPUScheduler'); this.gpuMetricCollectorScriptFolder = `${os.tmpdir()}/${os.userInfo().username}/nni/script`; } diff --git a/ts/nni_manager/training_service/local/localTrainingService.ts b/ts/nni_manager/training_service/local/localTrainingService.ts index 459bf389c3..1bc33a8340 100644 --- a/ts/nni_manager/training_service/local/localTrainingService.ts +++ b/ts/nni_manager/training_service/local/localTrainingService.ts @@ -98,7 +98,7 @@ class LocalTrainingService implements TrainingService { this.jobMap = new Map(); this.jobQueue = []; this.stopping = false; - this.log = getLogger(); + this.log = getLogger('LocalTrainingService'); this.experimentId = getExperimentId(); this.jobStreamMap = new Map(); this.log.info('Construct local machine training service.'); @@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService { this.jobQueue.push(trialJobId); this.jobMap.set(trialJobId, trialJobDetail); - this.log.debug(`submitTrialJob: return: ${JSON.stringify(trialJobDetail)} `); + this.log.debug('submitTrialJob: return:', trialJobDetail); return Promise.resolve(trialJobDetail); } diff --git a/ts/nni_manager/training_service/pai/paiJobInfoCollector.ts b/ts/nni_manager/training_service/pai/paiJobInfoCollector.ts index cf3974618e..24eaf7b3b8 100644 --- a/ts/nni_manager/training_service/pai/paiJobInfoCollector.ts +++ b/ts/nni_manager/training_service/pai/paiJobInfoCollector.ts @@ -18,7 +18,7 @@ interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { } */ export class PAIJobInfoCollector { private readonly trialJobsMap: Map; - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('PAIJobInfoCollector'); private readonly statusesNeedToCheck: TrialJobStatus[]; private readonly finalStatuses: TrialJobStatus[]; diff --git a/ts/nni_manager/training_service/pai/paiJobRestServer.ts b/ts/nni_manager/training_service/pai/paiJobRestServer.ts index 00e960cdce..1ba7930736 100644 --- a/ts/nni_manager/training_service/pai/paiJobRestServer.ts +++ b/ts/nni_manager/training_service/pai/paiJobRestServer.ts @@ -46,7 +46,7 @@ export class PAIJobRestServer extends ClusterJobRestServer { router.post(`/parameter-file-meta`, (req: Request, res: Response) => { try { - this.log.info(`POST /parameter-file-meta, body is ${JSON.stringify(req.body)}`); + this.log.info('POST /parameter-file-meta, body is', req.body); this.parameterFileMetaList.push(req.body); res.send(); } catch (err) { diff --git a/ts/nni_manager/training_service/pai/paiTrainingService.ts b/ts/nni_manager/training_service/pai/paiTrainingService.ts index 11f24cea5b..36cf78835a 100644 --- a/ts/nni_manager/training_service/pai/paiTrainingService.ts +++ b/ts/nni_manager/training_service/pai/paiTrainingService.ts @@ -63,7 +63,7 @@ class PAITrainingService implements TrainingService { private config: FlattenOpenpaiConfig; constructor(config: ExperimentConfig) { - this.log = getLogger(); + this.log = getLogger('PAITrainingService'); this.metricsEmitter = new EventEmitter(); this.trialJobsMap = new Map(); this.jobQueue = []; @@ -308,7 +308,7 @@ class PAITrainingService implements TrainingService { } public async submitTrialJob(form: TrialJobApplicationForm): Promise { - this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`); + this.log.info('submitTrialJob: form:', form); const trialJobId: string = uniqueString(5); //TODO: use HDFS working folder instead diff --git a/ts/nni_manager/training_service/remote_machine/gpuScheduler.ts b/ts/nni_manager/training_service/remote_machine/gpuScheduler.ts index f520e96c45..a78e841cb2 100644 --- a/ts/nni_manager/training_service/remote_machine/gpuScheduler.ts +++ b/ts/nni_manager/training_service/remote_machine/gpuScheduler.ts @@ -18,7 +18,7 @@ type SCHEDULE_POLICY_NAME = 'random' | 'round-robin'; export class GPUScheduler { private readonly machineExecutorMap: Map; - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('GPUScheduler'); private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin'; private roundRobinIndex: number = 0; private configuredRMs: RemoteMachineMeta[] = []; diff --git a/ts/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/ts/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts index f2f789d194..da6c5551bb 100644 --- a/ts/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts +++ b/ts/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts @@ -67,7 +67,7 @@ class RemoteMachineTrainingService implements TrainingService { this.sshConnectionPromises = []; this.expRootDir = getExperimentRootDir(); this.timer = component.get(ObservableTimer); - this.log = getLogger(); + this.log = getLogger('RemoteMachineTrainingService'); this.log.info('Construct remote machine training service.'); this.config = flattenConfig(config, 'remote'); diff --git a/ts/nni_manager/training_service/remote_machine/shellExecutor.ts b/ts/nni_manager/training_service/remote_machine/shellExecutor.ts index 0baa5bae6f..00379f6ebe 100644 --- a/ts/nni_manager/training_service/remote_machine/shellExecutor.ts +++ b/ts/nni_manager/training_service/remote_machine/shellExecutor.ts @@ -36,7 +36,7 @@ class ShellExecutor { public isWindows: boolean = false; constructor() { - this.log = getLogger(); + this.log = getLogger('ShellExecutor'); this.sshClient = new Client(); } diff --git a/ts/nni_manager/training_service/reusable/channels/webCommandChannel.ts b/ts/nni_manager/training_service/reusable/channels/webCommandChannel.ts index f292b5451a..ed55885fe6 100644 --- a/ts/nni_manager/training_service/reusable/channels/webCommandChannel.ts +++ b/ts/nni_manager/training_service/reusable/channels/webCommandChannel.ts @@ -61,7 +61,7 @@ export class WebCommandChannel extends CommandChannel { this.webSocketServer.on('connection', (client: WebSocket) => { this.log.debug(`WebCommandChannel: received connection`); client.onerror = (event): void => { - this.log.error(`error on client ${JSON.stringify(event)}`); + this.log.error('error on client', event); } this.clients.set(client, undefined); @@ -109,7 +109,7 @@ export class WebCommandChannel extends CommandChannel { // undefined means it's expecting initializing message. const commands = this.parseCommands(rawCommands); let isValid = false; - this.log.debug(`WebCommandChannel: received initialize message: ${JSON.stringify(rawCommands)}`); + this.log.debug('WebCommandChannel: received initialize message:', rawCommands); if (commands.length > 0) { const commandType = commands[0][0]; diff --git a/ts/nni_manager/training_service/reusable/commandChannel.ts b/ts/nni_manager/training_service/reusable/commandChannel.ts index 1cf836f093..a16d0e27aa 100644 --- a/ts/nni_manager/training_service/reusable/commandChannel.ts +++ b/ts/nni_manager/training_service/reusable/commandChannel.ts @@ -50,7 +50,7 @@ export abstract class CommandChannel { private readonly commandPattern: RegExp = /(?[\w]{2})(?[\d]{14})(?.*)\n?/gm; public constructor(commandEmitter: EventEmitter) { - this.log = getLogger(); + this.log = getLogger('CommandChannel'); this.commandEmitter = commandEmitter; } diff --git a/ts/nni_manager/training_service/reusable/environment.ts b/ts/nni_manager/training_service/reusable/environment.ts index 7c399c2a06..c89bc5edd6 100644 --- a/ts/nni_manager/training_service/reusable/environment.ts +++ b/ts/nni_manager/training_service/reusable/environment.ts @@ -82,7 +82,7 @@ export class EnvironmentInformation { public useSharedStorage?: boolean; constructor(id: string, name: string, envId?: string) { - this.log = getLogger(); + this.log = getLogger('EnvironmentInformation'); this.id = id; this.name = name; this.envId = envId ? envId : name; @@ -116,7 +116,7 @@ export class EnvironmentInformation { const gpuSummary = this.gpuSummaries.get(this.defaultNodeId); if (gpuSummary === undefined) { if (false === this.isNoGpuWarned) { - this.log.warning(`EnvironmentInformation: ${this.envId} no default gpu found. current gpu info ${JSON.stringify(this.gpuSummaries)}`); + this.log.warning(`EnvironmentInformation: ${this.envId} no default gpu found. current gpu info`, this.gpuSummaries); this.isNoGpuWarned = true; } } else { diff --git a/ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts index 9672084cf7..48a65db1f8 100644 --- a/ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts @@ -24,7 +24,7 @@ interface FlattenAmlConfig extends ExperimentConfig, AmlConfig { } @component.Singleton export class AMLEnvironmentService extends EnvironmentService { - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('AMLEnvironmentService'); private experimentId: string; private experimentRootDir: string; private config: FlattenAmlConfig; diff --git a/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts index 67ffd21d71..b462840168 100644 --- a/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts @@ -17,7 +17,7 @@ import { SharedStorageService } from '../sharedStorage' @component.Singleton export class LocalEnvironmentService extends EnvironmentService { - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('LocalEnvironmentService'); private experimentRootDir: string; private experimentId: string; diff --git a/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts index 580d27fad1..0ebe482aa2 100644 --- a/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts @@ -23,7 +23,7 @@ interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { } @component.Singleton export class OpenPaiEnvironmentService extends EnvironmentService { - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('OpenPaiEnvironmentService'); private paiClusterConfig: PAIClusterConfig | undefined; private paiTrialConfig: NNIPAITrialConfig | undefined; private paiToken: string; @@ -77,7 +77,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService { // Status code 200 for success if ((error !== undefined && error !== null) || response.statusCode >= 400) { const errorMessage: string = (error !== undefined && error !== null) ? error.message : - `OpenPAI: get environment list from PAI Cluster failed!, http code:${response.statusCode}, http body: ${JSON.stringify(body)}`; + `OpenPAI: get environment list from PAI Cluster failed!, http code:${response.statusCode}, http body:' ${JSON.stringify(body)}`; this.log.error(`${errorMessage}`); deferred.reject(errorMessage); } else { @@ -113,7 +113,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService { this.log.debug(`OpenPAI: job ${environment.envId} change status ${oldEnvironmentStatus} to ${environment.status} due to job is ${jobResponse.state}.`) } } else { - this.log.error(`OpenPAI: job ${environment.envId} has no state returned. body:${JSON.stringify(jobResponse)}`); + this.log.error(`OpenPAI: job ${environment.envId} has no state returned. body:`, jobResponse); // some error happens, and mark this environment environment.status = 'FAILED'; } diff --git a/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts index cd31960601..3f5fed8e5c 100644 --- a/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts @@ -39,7 +39,7 @@ export class RemoteEnvironmentService extends EnvironmentService { this.machineExecutorManagerMap = new Map(); this.remoteMachineMetaOccupiedMap = new Map(); this.experimentRootDir = experimentRootDir; - this.log = getLogger(); + this.log = getLogger('RemoteEnvironmentService'); this.config = flattenConfig(config, 'remote'); // codeDir is not a valid directory, throw Error diff --git a/ts/nni_manager/training_service/reusable/gpuScheduler.ts b/ts/nni_manager/training_service/reusable/gpuScheduler.ts index 86f590746d..2e94c29a58 100644 --- a/ts/nni_manager/training_service/reusable/gpuScheduler.ts +++ b/ts/nni_manager/training_service/reusable/gpuScheduler.ts @@ -29,7 +29,7 @@ export type GpuScheduleResult = { export class GpuScheduler { // private readonly machineExecutorMap: Set; - private readonly log: Logger = getLogger(); + private readonly log: Logger = getLogger('GpuScheduler'); private readonly policyName: SCHEDULE_POLICY_NAME = 'recently-idle'; private defaultSetting: GpuSchedulerSetting; private roundRobinIndex: number = 0; diff --git a/ts/nni_manager/training_service/reusable/shared_storages/azureblobStorageService.ts b/ts/nni_manager/training_service/reusable/shared_storages/azureblobStorageService.ts index 46165a307b..7a0185f861 100644 --- a/ts/nni_manager/training_service/reusable/shared_storages/azureblobStorageService.ts +++ b/ts/nni_manager/training_service/reusable/shared_storages/azureblobStorageService.ts @@ -66,7 +66,7 @@ export class AzureBlobSharedStorageService extends SharedStorageService { constructor() { super(); - this.log = getLogger(); + this.log = getLogger('AzureBlobSharedStorageService'); this.internalStorageService = new MountedStorageService(); this.experimentId = getExperimentId(); } diff --git a/ts/nni_manager/training_service/reusable/shared_storages/nfsStorageService.ts b/ts/nni_manager/training_service/reusable/shared_storages/nfsStorageService.ts index 212ea837d9..db7687196b 100644 --- a/ts/nni_manager/training_service/reusable/shared_storages/nfsStorageService.ts +++ b/ts/nni_manager/training_service/reusable/shared_storages/nfsStorageService.ts @@ -50,7 +50,7 @@ export class NFSSharedStorageService extends SharedStorageService { constructor() { super(); - this.log = getLogger(); + this.log = getLogger('NFSSharedStorageService'); this.internalStorageService = new MountedStorageService(); this.experimentId = getExperimentId(); } diff --git a/ts/nni_manager/training_service/reusable/storageService.ts b/ts/nni_manager/training_service/reusable/storageService.ts index ec54a6792a..c532d2ddad 100644 --- a/ts/nni_manager/training_service/reusable/storageService.ts +++ b/ts/nni_manager/training_service/reusable/storageService.ts @@ -30,7 +30,7 @@ export abstract class StorageService { protected abstract internalBasename(...paths: string[]): string; constructor() { - this.logger = getLogger(); + this.logger = getLogger('StorageService'); } public initialize(localRoot: string, remoteRoot: string): void {