From e9f832df3603397a4a672d3c54fa8413be9ba0d9 Mon Sep 17 00:00:00 2001 From: J-shang <33053116+J-shang@users.noreply.github.com> Date: Tue, 15 Dec 2020 16:37:53 +0800 Subject: [PATCH] change SIGKILL to SIGTERM in local mode cancel trial job (#3173) --- .../local/localTrainingService.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ts/nni_manager/training_service/local/localTrainingService.ts b/ts/nni_manager/training_service/local/localTrainingService.ts index 60ed832cf1..d21f6d71ec 100644 --- a/ts/nni_manager/training_service/local/localTrainingService.ts +++ b/ts/nni_manager/training_service/local/localTrainingService.ts @@ -253,7 +253,20 @@ class LocalTrainingService implements TrainingService { return Promise.resolve(); } - tkill(trialJob.pid, 'SIGKILL'); + tkill(trialJob.pid, 'SIGTERM'); + const startTime = Date.now(); + while(await isAlive(trialJob.pid)) { + if (Date.now() - startTime > 4999) { + tkill(trialJob.pid, 'SIGKILL', (err) => { + if (err) { + this.log.error(`kill trial job error: ${err}`); + } + }); + break; + } + await delay(500); + } + this.setTrialJobStatus(trialJob, getJobCancelStatus(isEarlyStopped)); return Promise.resolve();