diff --git a/test/parallel/test-cluster-master-error.js b/test/parallel/test-cluster-master-error.js index ae0f655bb8e5d0..c5f9687d94db6b 100644 --- a/test/parallel/test-cluster-master-error.js +++ b/test/parallel/test-cluster-master-error.js @@ -92,15 +92,15 @@ if (cluster.isWorker) { // Check that the cluster died accidently existMaster = !!code; - // Give the workers time to shut down - var timeout = 200; - if (common.isAix) { - // AIX needs more time due to default exit performance - timeout = 1000; - } - setTimeout(checkWorkers, timeout); - - function checkWorkers() { + // Give the workers time to shut down. + // Since the parent process of the workers does not clean up after the + // forked children they will end up as zombie processes. pid 1 (init) will + // reparent those and eventually reap them. This takes normally <200ms but + // some init systems, like AIX and busybox init, needs a bit more. + var timeout = 1000; + + var waitWorker = setInterval(function() { + timeout -= 10; // When master is dead all workers should be dead to var alive = false; workers.forEach(function(pid) { @@ -111,7 +111,10 @@ if (cluster.isWorker) { // If a worker was alive this did not act as expected existWorker = !alive; - } + + if (!alive || (timeout <= 0)) + clearInterval(waitWorker); + }, 10); }); process.once('exit', function() { diff --git a/test/parallel/test-cluster-master-kill.js b/test/parallel/test-cluster-master-kill.js index 32f22b0f968b7b..daec20a80e9b04 100644 --- a/test/parallel/test-cluster-master-kill.js +++ b/test/parallel/test-cluster-master-kill.js @@ -34,6 +34,10 @@ if (cluster.isWorker) { var isAlive = function(pid) { try { //this will throw an error if the process is dead + // note that the master does not clean up after the children so + // even dead children will return as 'alive' zombies here. + // when master exits pid 1 (init) will take over as parent for the + // zombie children and will eventually reap them. process.kill(pid, 0); return true; @@ -58,21 +62,25 @@ if (cluster.isWorker) { // make sure that the master died by purpose assert.equal(code, 0); - // check worker process status - var timeout = 200; - if (common.isAix) { - // AIX needs more time due to default exit performance - timeout = 1000; - } - setTimeout(function() { + // wait for init (pid 1) to collect the worker process + // normally 200ms is enough, but it depends on the init (pid 1) + // implementation. AIX's init and busybox init need more. We wait + // up to 1 sec (1000ms) before we trigger error. + var timeout = 1000; + + var waitWorker = setInterval(function() { + timeout -= 10; alive = isAlive(pid); - }, timeout); + if (!alive || (timeout <= 0)) + clearInterval(waitWorker); + }, 10); }); process.once('exit', function() { // cleanup: kill the worker if alive if (alive) { process.kill(pid); + // we need waitpid(pid) here to avoid a zombie worker } assert.equal(typeof pid, 'number', 'did not get worker pid info');