Skip to content

Commit

Permalink
cluster: fix race condition setting suicide prop
Browse files Browse the repository at this point in the history
There is no guarantee that the `suicide` property of a worker in the
master process is going to be set when the `disconnect` and `exit`
events are emitted.

To fix it, wait for the ACK of the suicide message from the master
before disconnecting the worker. Also, there's no need to send the
suicide message from the worker if the disconnection has been
initiated in the master.

Add `test-cluster-disconnect-suicide-race` that forks a lot of workers
to consistently reproduce the issue this patch tries to solve.

Modify `test-regress-GH-3238` so it checks both the `kill` and
`disconnect` cases. Also take into account that the `disconnect` event
may be received after the `exit` event.

PR-URL: #4349
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
  • Loading branch information
santigimeno authored and Myles Borins committed Feb 15, 2016
1 parent 48c2783 commit f8a676e
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 28 deletions.
48 changes: 31 additions & 17 deletions lib/cluster.js
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ function masterInit() {
else if (message.act === 'listening')
listening(worker, message);
else if (message.act === 'suicide')
worker.suicide = true;
suicide(worker, message);
else if (message.act === 'close')
close(worker, message);
}
Expand All @@ -439,6 +439,11 @@ function masterInit() {
cluster.emit('online', worker);
}

function suicide(worker, message) {
worker.suicide = true;
send(worker, { ack: message.seq });
}

function queryServer(worker, message) {
var args = [message.address,
message.port,
Expand Down Expand Up @@ -532,7 +537,7 @@ function workerInit() {
if (message.act === 'newconn')
onconnection(message, handle);
else if (message.act === 'disconnect')
worker.disconnect();
_disconnect.call(worker, true);
}
};

Expand Down Expand Up @@ -653,14 +658,36 @@ function workerInit() {
}

Worker.prototype.disconnect = function() {
_disconnect.call(this);
};

Worker.prototype.destroy = function() {
this.suicide = true;
if (!this.isConnected()) process.exit(0);
var exit = process.exit.bind(null, 0);
send({ act: 'suicide' }, () => process.disconnect());
process.once('disconnect', exit);
};

function send(message, cb) {
sendHelper(process, message, null, cb);
}

function _disconnect(masterInitiated) {
this.suicide = true;
let waitingCount = 1;

function checkWaitingCount() {
waitingCount--;
if (waitingCount === 0) {
send({ act: 'suicide' });
process.disconnect();
// If disconnect is worker initiated, wait for ack to be sure suicide
// is properly set in the master, otherwise, if it's master initiated
// there's no need to send the suicide message
if (masterInitiated) {
process.disconnect();
} else {
send({ act: 'suicide' }, () => process.disconnect());
}
}
}

Expand All @@ -672,19 +699,6 @@ function workerInit() {
}

checkWaitingCount();
};

Worker.prototype.destroy = function() {
this.suicide = true;
if (!this.isConnected()) process.exit(0);
var exit = process.exit.bind(null, 0);
send({ act: 'suicide' }, exit);
process.once('disconnect', exit);
process.disconnect();
};

function send(message, cb) {
sendHelper(process, message, null, cb);
}
}

Expand Down
23 changes: 12 additions & 11 deletions test/parallel/test-regress-GH-3238.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ const assert = require('assert');
const cluster = require('cluster');

if (cluster.isMaster) {
const worker = cluster.fork();
let disconnected = false;
function forkWorker(action) {
const worker = cluster.fork({ action });
worker.on('disconnect', common.mustCall(() => {
assert.strictEqual(worker.suicide, true);
}));

worker.on('disconnect', common.mustCall(function() {
assert.strictEqual(worker.suicide, true);
disconnected = true;
}));
worker.on('exit', common.mustCall(() => {
assert.strictEqual(worker.suicide, true);
}));
}

worker.on('exit', common.mustCall(function() {
assert.strictEqual(worker.suicide, true);
assert.strictEqual(disconnected, true);
}));
forkWorker('disconnect');
forkWorker('kill');
} else {
cluster.worker.disconnect();
cluster.worker[process.env.action]();
}
32 changes: 32 additions & 0 deletions test/sequential/test-cluster-disconnect-suicide-race.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const cluster = require('cluster');
const os = require('os');

if (cluster.isMaster) {
function forkWorker(action) {
const worker = cluster.fork({ action });
worker.on('disconnect', common.mustCall(() => {
assert.strictEqual(worker.suicide, true);
}));

worker.on('exit', common.mustCall(() => {
assert.strictEqual(worker.suicide, true);
}));
}

const cpus = os.cpus().length;
const tries = cpus > 8 ? 64 : cpus * 8;

cluster.on('exit', common.mustCall((worker, code) => {
assert.strictEqual(code, 0, 'worker exited with error');
}, tries * 2));

for (let i = 0; i < tries; ++i) {
forkWorker('disconnect');
forkWorker('kill');
}
} else {
cluster.worker[process.env.action]();
}

0 comments on commit f8a676e

Please sign in to comment.