From 0fb3ec96f7a589881d1f7c501e983e755ddbb8de Mon Sep 17 00:00:00 2001 From: Santiago Gimeno Date: Fri, 18 Dec 2015 19:13:50 +0100 Subject: [PATCH] cluster: fix race condition setting suicide prop There is no guarantee that the `suicide` property of a worker in the master process is going to be set when the `disconnect` and `exit` events are emitted. To fix it, wait for the ACK of the suicide message from the master before disconnecting the worker. Also, there's no need to send the suicide message from the worker if the disconnection has been initiated in the master. Add `test-cluster-disconnect-suicide-race` that forks a lot of workers to consistently reproduce the issue this patch tries to solve. Modify `test-regress-GH-3238` so it checks both the `kill` and `disconnect` cases. Also take into account that the `disconnect` event may be received after the `exit` event. --- lib/cluster.js | 48 ++++++++++++------- test/parallel/test-regress-GH-3238.js | 23 ++++----- .../test-cluster-disconnect-suicide-race.js | 32 +++++++++++++ 3 files changed, 75 insertions(+), 28 deletions(-) create mode 100644 test/sequential/test-cluster-disconnect-suicide-race.js diff --git a/lib/cluster.js b/lib/cluster.js index 8356fad88a60d0..c8bf658d5b32a8 100644 --- a/lib/cluster.js +++ b/lib/cluster.js @@ -434,7 +434,7 @@ function masterInit() { else if (message.act === 'listening') listening(worker, message); else if (message.act === 'suicide') - worker.suicide = true; + suicide(worker, message); else if (message.act === 'close') close(worker, message); } @@ -445,6 +445,11 @@ function masterInit() { cluster.emit('online', worker); } + function suicide(worker, message) { + worker.suicide = true; + send(worker, { ack: message.seq }); + } + function queryServer(worker, message) { // Stop processing if worker already disconnecting if (worker.suicide) @@ -541,7 +546,7 @@ function workerInit() { if (message.act === 'newconn') onconnection(message, handle); else if (message.act === 'disconnect') - worker.disconnect(); + _disconnect.call(worker, true); } }; @@ -662,14 +667,36 @@ function workerInit() { } Worker.prototype.disconnect = function() { + _disconnect.call(this); + }; + + Worker.prototype.destroy = function() { + this.suicide = true; + if (!this.isConnected()) process.exit(0); + var exit = process.exit.bind(null, 0); + send({ act: 'suicide' }, () => process.disconnect()); + process.once('disconnect', exit); + }; + + function send(message, cb) { + sendHelper(process, message, null, cb); + } + + function _disconnect(masterInitiated) { this.suicide = true; let waitingCount = 1; function checkWaitingCount() { waitingCount--; if (waitingCount === 0) { - send({ act: 'suicide' }); - process.disconnect(); + // If disconnect is worker initiated, wait for ack to be sure suicide + // is properly set in the master, otherwise, if it's master initiated + // there's no need to send the suicide message + if (masterInitiated) { + process.disconnect(); + } else { + send({ act: 'suicide' }, () => process.disconnect()); + } } } @@ -681,19 +708,6 @@ function workerInit() { } checkWaitingCount(); - }; - - Worker.prototype.destroy = function() { - this.suicide = true; - if (!this.isConnected()) process.exit(0); - var exit = process.exit.bind(null, 0); - send({ act: 'suicide' }, exit); - process.once('disconnect', exit); - process.disconnect(); - }; - - function send(message, cb) { - sendHelper(process, message, null, cb); } } diff --git a/test/parallel/test-regress-GH-3238.js b/test/parallel/test-regress-GH-3238.js index a92a09db5fe4b1..e6fe030bda9a10 100644 --- a/test/parallel/test-regress-GH-3238.js +++ b/test/parallel/test-regress-GH-3238.js @@ -4,18 +4,19 @@ const assert = require('assert'); const cluster = require('cluster'); if (cluster.isMaster) { - const worker = cluster.fork(); - let disconnected = false; + function forkWorker(action) { + const worker = cluster.fork({ action }); + worker.on('disconnect', common.mustCall(() => { + assert.strictEqual(worker.suicide, true); + })); - worker.on('disconnect', common.mustCall(function() { - assert.strictEqual(worker.suicide, true); - disconnected = true; - })); + worker.on('exit', common.mustCall(() => { + assert.strictEqual(worker.suicide, true); + })); + } - worker.on('exit', common.mustCall(function() { - assert.strictEqual(worker.suicide, true); - assert.strictEqual(disconnected, true); - })); + forkWorker('disconnect'); + forkWorker('kill'); } else { - cluster.worker.disconnect(); + cluster.worker[process.env.action](); } diff --git a/test/sequential/test-cluster-disconnect-suicide-race.js b/test/sequential/test-cluster-disconnect-suicide-race.js new file mode 100644 index 00000000000000..e05c420e1fdd9b --- /dev/null +++ b/test/sequential/test-cluster-disconnect-suicide-race.js @@ -0,0 +1,32 @@ +'use strict'; +const common = require('../common'); +const assert = require('assert'); +const cluster = require('cluster'); +const os = require('os'); + +if (cluster.isMaster) { + function forkWorker(action) { + const worker = cluster.fork({ action }); + worker.on('disconnect', common.mustCall(() => { + assert.strictEqual(worker.suicide, true); + })); + + worker.on('exit', common.mustCall(() => { + assert.strictEqual(worker.suicide, true); + })); + } + + const cpus = os.cpus().length; + const tries = cpus > 8 ? 64 : cpus * 8; + + cluster.on('exit', common.mustCall((worker, code) => { + assert.strictEqual(code, 0, 'worker exited with error'); + }, tries * 2)); + + for (let i = 0; i < tries; ++i) { + forkWorker('disconnect'); + forkWorker('kill'); + } +} else { + cluster.worker[process.env.action](); +}