Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
Fix a logging related bug (#3705)
Browse files Browse the repository at this point in the history
  • Loading branch information
liuzhe-lz authored Jun 3, 2021
1 parent b7c91e7 commit 521f191
Show file tree
Hide file tree
Showing 34 changed files with 62 additions and 63 deletions.
2 changes: 1 addition & 1 deletion ts/nni_manager/common/pythonScript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export async function runPythonScript(script: string, logger?: Logger): Promise<

if (stderr) {
if (logger === undefined) {
logger = getLogger();
logger = getLogger('pythonScript');
}
logger.warning('python script has stderr.');
logger.warning('script:', script);
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/common/restServer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export abstract class RestServer {
protected hostName: string = '0.0.0.0';
protected port?: number;
protected app: express.Application = express();
protected log: Logger = getLogger();
protected log: Logger = getLogger('RestServer');
protected basePort?: number;

constructor() {
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/core/ipcInterface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class IpcInterface {
private incomingStream: Readable;
private eventEmitter: EventEmitter;
private readBuffer: Buffer;
private logger: Logger = getLogger();
private logger: Logger = getLogger('IpcInterface');

/**
* Construct a IPC proxy
Expand Down
5 changes: 2 additions & 3 deletions ts/nni_manager/core/nniDataStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { getDefaultDatabaseDir, mkDirP } from '../common/utils';

class NNIDataStore implements DataStore {
private db: Database = component.get(Database);
private log: Logger = getLogger();
private log: Logger = getLogger('NNIDataStore');
private initTask!: Deferred<void>;

public init(): Promise<void> {
Expand Down Expand Up @@ -71,7 +71,6 @@ class NNIDataStore implements DataStore {

public storeTrialJobEvent(
event: TrialJobEvent, trialJobId: string, hyperParameter?: string, jobDetail?: TrialJobDetail): Promise<void> {
//this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`);

// Use the timestamp in jobDetail as TrialJobEvent timestamp for different events
let timestamp: number | undefined;
Expand Down Expand Up @@ -243,7 +242,7 @@ class NNIDataStore implements DataStore {
for (const metric of metrics) {
const existMetrics: MetricDataRecord[] | undefined = map.get(metric.trialJobId);
if (existMetrics !== undefined) {
this.log.error(`Found multiple FINAL results for trial job ${trialJobId}, metrics: ${JSON.stringify(metrics)}`);
this.log.error(`Found multiple FINAL results for trial job ${trialJobId}, metrics:`, metrics);
} else {
map.set(metric.trialJobId, [metric]);
}
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/core/nniExperimentsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class NNIExperimentsManager implements ExperimentManager {

constructor() {
this.experimentsPath = getExperimentsInfoPath();
this.log = getLogger();
this.log = getLogger('NNIExperimentsManager');
this.profileUpdateTimer = {};
}

Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/core/nniTensorboardManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class NNITensorboardManager implements TensorboardManager {
private nniManager: Manager;

constructor() {
this.log = getLogger();
this.log = getLogger('NNITensorboardManager');
this.tensorboardTaskMap = new Map<string, TensorboardTaskDetail>();
this.setTensorboardVersion();
this.nniManager = component.get(Manager);
Expand Down
12 changes: 6 additions & 6 deletions ts/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class NNIManager implements Manager {
this.trialDataForTuner = '';
this.readonly = false;

this.log = getLogger();
this.log = getLogger('NNIManager');
this.dataStore = component.get(DataStore);
this.status = {
status: 'INITIALIZED',
Expand Down Expand Up @@ -659,7 +659,7 @@ class NNIManager implements Manager {
}
const form = this.waitingTrials.shift() as TrialJobApplicationForm;
this.currSubmittedTrialNum++;
this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`);
this.log.info('submitTrialJob: form:', form);
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form);
const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail);
await this.storeExperimentProfile();
Expand Down Expand Up @@ -732,15 +732,15 @@ class NNIManager implements Manager {
}

private async onTrialJobMetrics(metric: TrialJobMetric): Promise<void> {
this.log.debug(`NNIManager received trial job metrics: ${JSON.stringify(metric)}`);
this.log.debug('NNIManager received trial job metrics:', metric);
if (this.trialJobs.has(metric.id)){
await this.dataStore.storeMetricData(metric.id, metric.data);
if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup');
}
this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data);
} else {
this.log.warning(`NNIManager received non-existent trial job metrics: ${metric}`);
this.log.warning('NNIManager received non-existent trial job metrics:', metric);
}
}

Expand Down Expand Up @@ -804,7 +804,7 @@ class NNIManager implements Manager {
index: tunerCommand.parameter_index
}
};
this.log.info(`updateTrialJob: job id: ${tunerCommand.trial_job_id}, form: ${JSON.stringify(trialJobForm)}`);
this.log.info('updateTrialJob: job id:', tunerCommand.trial_job_id, 'form:', trialJobForm);
await this.trainingService.updateTrialJob(tunerCommand.trial_job_id, trialJobForm);
if (tunerCommand['parameters'] !== null) {
// parameters field is set as empty string if no more hyper parameter can be generated by tuner.
Expand All @@ -820,7 +820,7 @@ class NNIManager implements Manager {
break;
}
case KILL_TRIAL_JOB: {
this.log.info(`cancelTrialJob: ${JSON.parse(content)}`);
this.log.info('cancelTrialJob:', content);
await this.trainingService.cancelTrialJob(JSON.parse(content), true);
break;
}
Expand Down
16 changes: 8 additions & 8 deletions ts/nni_manager/core/sqlDatabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ function loadMetricData(row: any): MetricDataRecord {

class SqlDB implements Database {
private db!: sqlite3.Database;
private log: Logger = getLogger();
private log: Logger = getLogger('SqlDB');
private initTask!: Deferred<void>;

public init(createNew: boolean, dbDir: string): Promise<void> {
Expand Down Expand Up @@ -130,7 +130,7 @@ class SqlDB implements Database {
exp.nextSequenceId,
exp.revision
];
this.log.trace(`storeExperimentProfile: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`storeExperimentProfile: SQL: ${sql}, args:`, args);
const deferred: Deferred<void> = new Deferred<void>();
this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); });

Expand All @@ -147,7 +147,7 @@ class SqlDB implements Database {
sql = 'select * from ExperimentProfile where id=? and revision=?';
args = [experimentId, revision];
}
this.log.trace(`queryExperimentProfile: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`queryExperimentProfile: SQL: ${sql}, args:`, args);
const deferred: Deferred<ExperimentProfile[]> = new Deferred<ExperimentProfile[]>();
this.db.all(sql, args, (err: Error | null, rows: any[]) => {
this.resolve(deferred, err, rows, loadExperimentProfile);
Expand All @@ -170,7 +170,7 @@ class SqlDB implements Database {
const message: string | undefined = jobDetail === undefined ? undefined : jobDetail.message;
const args: any[] = [timestamp, trialJobId, event, hyperParameter, logPath, sequenceId, message];

this.log.trace(`storeTrialJobEvent: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`storeTrialJobEvent: SQL: ${sql}, args:`, args);
const deferred: Deferred<void> = new Deferred<void>();
this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); });

Expand All @@ -193,7 +193,7 @@ class SqlDB implements Database {
args = [trialJobId, event];
}

this.log.trace(`queryTrialJobEvent: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`queryTrialJobEvent: SQL: ${sql}, args:`, args);
const deferred: Deferred<TrialJobEventRecord[]> = new Deferred<TrialJobEventRecord[]>();
this.db.all(sql, args, (err: Error | null, rows: any[]) => {
this.resolve(deferred, err, rows, loadTrialJobEvent);
Expand All @@ -207,7 +207,7 @@ class SqlDB implements Database {
const json: MetricDataRecord = JSON.parse(data);
const args: any[] = [Date.now(), json.trialJobId, json.parameterId, json.type, json.sequence, JSON.stringify(json.data)];

this.log.trace(`storeMetricData: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`storeMetricData: SQL: ${sql}, args:`, args);
const deferred: Deferred<void> = new Deferred<void>();
this.db.run(sql, args, (err: Error | null) => { this.resolve(deferred, err); });

Expand All @@ -230,7 +230,7 @@ class SqlDB implements Database {
args = [trialJobId, metricType];
}

this.log.trace(`queryMetricData: SQL: ${sql}, args: ${JSON.stringify(args)}`);
this.log.trace(`queryMetricData: SQL: ${sql}, args:`, args);
const deferred: Deferred<MetricDataRecord[]> = new Deferred<MetricDataRecord[]>();
this.db.all(sql, args, (err: Error | null, rows: any[]) => {
this.resolve(deferred, err, rows, loadMetricData);
Expand Down Expand Up @@ -259,7 +259,7 @@ class SqlDB implements Database {
for (const row of (<any[]>rows)) {
data.push(rowLoader(row));
}
this.log.trace(`sql query result: ${JSON.stringify(data)}`);
this.log.trace(`sql query result:`, data);
(<Deferred<T[]>>deferred).resolve(data);
}
}
Expand Down
4 changes: 2 additions & 2 deletions ts/nni_manager/rest_server/restHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ class NNIRestHandler {
this.experimentsManager = component.get(ExperimentManager);
this.tensorboardManager = component.get(TensorboardManager);
this.restServer = rs;
this.log = getLogger();
this.log = getLogger('NNIRestHandler');
}

public createRestHandler(): Router {
const router: Router = Router();

router.use((req: Request, res: Response, next) => {
this.log.debug(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`);
this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export abstract class ClusterJobRestServer extends RestServer {
const router: Router = Router();

router.use((req: Request, res: Response, next: any) => {
this.log.info(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`);
this.log.info(`${req.method}: ${req.url}: body:`, req.body);
res.setHeader('Content-Type', 'application/json');
next();
});
Expand Down Expand Up @@ -109,7 +109,7 @@ export abstract class ClusterJobRestServer extends RestServer {
router.post(`/update-metrics/${this.expId}/:trialId`, (req: Request, res: Response) => {
try {
this.log.info(`Get update-metrics request, trial job id is ${req.params.trialId}`);
this.log.info(`update-metrics body is ${JSON.stringify(req.body)}`);
this.log.info('update-metrics body is', req.body);

this.handleTrialMetrics(req.body.jobId, req.body.metrics);

Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/training_service/common/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise<
await fs.promises.mkdir(destPath);
}
} else {
getLogger().debug(`Copying file from ${sourcePath} to ${destPath}`);
getLogger('execCopydir').debug(`Copying file from ${sourcePath} to ${destPath}`);
await fs.promises.copyFile(sourcePath, destPath);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export namespace AzureStorageClientUtility {
const deferred: Deferred<boolean> = new Deferred<boolean>();
fileServerClient.createShareIfNotExists(azureShare, (error: any, _result: any, _response: any) => {
if (error) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`Create share failed:, ${error}`);
deferred.resolve(false);
} else {
Expand All @@ -43,7 +43,7 @@ export namespace AzureStorageClientUtility {
const deferred: Deferred<boolean> = new Deferred<boolean>();
fileServerClient.createDirectoryIfNotExists(azureShare, azureFoler, (error: any, _result: any, _response: any) => {
if (error) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`Create directory failed:, ${error}`);
deferred.resolve(false);
} else {
Expand Down Expand Up @@ -91,7 +91,7 @@ export namespace AzureStorageClientUtility {
await fileServerClient.createFileFromLocalFile(azureShare, azureDirectory, azureFileName, localFilePath,
(error: any, _result: any, _response: any) => {
if (error) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`Upload file failed:, ${error}`);
deferred.resolve(false);
} else {
Expand All @@ -116,7 +116,7 @@ export namespace AzureStorageClientUtility {
await fileServerClient.getFileToStream(azureShare, azureDirectory, azureFileName, fs.createWriteStream(localFilePath),
(error: any, _result: any, _response: any) => {
if (error) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`Download file failed:, ${error}`);
deferred.resolve(false);
} else {
Expand Down Expand Up @@ -185,19 +185,19 @@ export namespace AzureStorageClientUtility {
fileServerClient.listFilesAndDirectoriesSegmented(azureShare, azureDirectory, 'null',
async (_error: any, result: any, _response: any) => {
if (('entries' in result) === false) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`list files failed, can't get entries in result`);
throw new Error(`list files failed, can't get entries in result`);
}

if (('files' in result.entries) === false) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`list files failed, can't get files in result['entries']`);
throw new Error(`list files failed, can't get files in result['entries']`);
}

if (('directories' in result.directories) === false) {
getLogger()
getLogger('AzureStorageClientUtility')
.error(`list files failed, can't get directories in result['entries']`);
throw new Error(`list files failed, can't get directories in result['entries']`);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {getLogger, Logger} from '../../common/log';
*/
class GeneralK8sClient {
protected readonly client: any;
protected readonly log: Logger = getLogger();
protected readonly log: Logger = getLogger('GeneralK8sClient');
protected namespace: string = 'default';

constructor() {
Expand Down Expand Up @@ -135,7 +135,7 @@ class GeneralK8sClient {
*/
abstract class KubernetesCRDClient {
protected readonly client: any;
protected readonly log: Logger = getLogger();
protected readonly log: Logger = getLogger('KubernetesCRDClient');
protected crdSchema: any;

constructor() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
*/
export class KubernetesJobInfoCollector {
protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
protected readonly log: Logger = getLogger();
protected readonly log: Logger = getLogger('KubernetesJobInfoCollector');
protected readonly statusesNeedToCheck: TrialJobStatus[];

constructor(jobMap: Map<string, KubernetesTrialJobDetail>) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ abstract class KubernetesTrainingService {
protected expContainerCodeFolder: string;

constructor() {
this.log = getLogger();
this.log = getLogger('KubernetesTrainingService');
this.metricsEmitter = new EventEmitter();
this.trialJobsMap = new Map<string, KubernetesTrialJobDetail>();
this.trialLocalTempFolder = path.join(getExperimentRootDir(), 'trials-nfs-tmp');
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/training_service/local/gpuScheduler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class GPUScheduler {

constructor() {
this.stopping = false;
this.log = getLogger();
this.log = getLogger('GPUScheduler');
this.gpuMetricCollectorScriptFolder = `${os.tmpdir()}/${os.userInfo().username}/nni/script`;
}

Expand Down
4 changes: 2 additions & 2 deletions ts/nni_manager/training_service/local/localTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class LocalTrainingService implements TrainingService {
this.jobMap = new Map<string, LocalTrialJobDetail>();
this.jobQueue = [];
this.stopping = false;
this.log = getLogger();
this.log = getLogger('LocalTrainingService');
this.experimentId = getExperimentId();
this.jobStreamMap = new Map<string, ts.Stream>();
this.log.info('Construct local machine training service.');
Expand Down Expand Up @@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
this.jobQueue.push(trialJobId);
this.jobMap.set(trialJobId, trialJobDetail);

this.log.debug(`submitTrialJob: return: ${JSON.stringify(trialJobDetail)} `);
this.log.debug('submitTrialJob: return:', trialJobDetail);

return Promise.resolve(trialJobDetail);
}
Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/training_service/pai/paiJobInfoCollector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { }
*/
export class PAIJobInfoCollector {
private readonly trialJobsMap: Map<string, PAITrialJobDetail>;
private readonly log: Logger = getLogger();
private readonly log: Logger = getLogger('PAIJobInfoCollector');
private readonly statusesNeedToCheck: TrialJobStatus[];
private readonly finalStatuses: TrialJobStatus[];

Expand Down
2 changes: 1 addition & 1 deletion ts/nni_manager/training_service/pai/paiJobRestServer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {

router.post(`/parameter-file-meta`, (req: Request, res: Response) => {
try {
this.log.info(`POST /parameter-file-meta, body is ${JSON.stringify(req.body)}`);
this.log.info('POST /parameter-file-meta, body is', req.body);
this.parameterFileMetaList.push(req.body);
res.send();
} catch (err) {
Expand Down
4 changes: 2 additions & 2 deletions ts/nni_manager/training_service/pai/paiTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class PAITrainingService implements TrainingService {
private config: FlattenOpenpaiConfig;

constructor(config: ExperimentConfig) {
this.log = getLogger();
this.log = getLogger('PAITrainingService');
this.metricsEmitter = new EventEmitter();
this.trialJobsMap = new Map<string, PAITrialJobDetail>();
this.jobQueue = [];
Expand Down Expand Up @@ -308,7 +308,7 @@ class PAITrainingService implements TrainingService {
}

public async submitTrialJob(form: TrialJobApplicationForm): Promise<TrialJobDetail> {
this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`);
this.log.info('submitTrialJob: form:', form);

const trialJobId: string = uniqueString(5);
//TODO: use HDFS working folder instead
Expand Down
Loading

0 comments on commit 521f191

Please sign in to comment.