Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fs: partition readFile to avoid threadpool exhaustion #17054

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions benchmark/fs/readfile-partitioned.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Submit a mix of short and long jobs to the threadpool.
// Report total job throughput.
// If we partition the long job, overall job throughput goes up significantly.
// However, this comes at the cost of the long job throughput.
//
// Short jobs: small zip jobs.
// Long jobs: fs.readFile on a large file.

'use strict';

const path = require('path');
const common = require('../common.js');
const filename = path.resolve(__dirname,
`.removeme-benchmark-garbage-${process.pid}`);
const fs = require('fs');
const zlib = require('zlib');
const assert = require('assert');

const bench = common.createBenchmark(main, {
dur: [5],
len: [1024, 16 * 1024 * 1024],
concurrent: [1, 10]
});

function main(conf) {
const len = +conf.len;
try { fs.unlinkSync(filename); } catch (e) {}
var data = Buffer.alloc(len, 'x');
fs.writeFileSync(filename, data);
data = null;

var zipData = Buffer.alloc(1024, 'a');

var reads = 0;
var zips = 0;
var benchEnded = false;
bench.start();
setTimeout(function() {
const totalOps = reads + zips;
benchEnded = true;
bench.end(totalOps);
try { fs.unlinkSync(filename); } catch (e) {}
}, +conf.dur * 1000);

function read() {
fs.readFile(filename, afterRead);
}

function afterRead(er, data) {
if (er) {
if (er.code === 'ENOENT') {
// Only OK if unlinked by the timer from main.
assert.ok(benchEnded);
return;
}
throw er;
}

if (data.length !== len)
throw new Error('wrong number of bytes returned');

reads++;
if (!benchEnded)
read();
}

function zip() {
zlib.deflate(zipData, afterZip);
}

function afterZip(er, data) {
if (er)
throw er;

zips++;
if (!benchEnded)
zip();
}

// Start reads
var cur = +conf.concurrent;
while (cur--) read();

// Start a competing zip
zip();
}
15 changes: 11 additions & 4 deletions benchmark/fs/readfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const common = require('../common.js');
const filename = path.resolve(process.env.NODE_TMPDIR || __dirname,
`.removeme-benchmark-garbage-${process.pid}`);
const fs = require('fs');
const assert = require('assert');

const bench = common.createBenchmark(main, {
dur: [5],
Expand All @@ -22,10 +23,10 @@ function main({ len, dur, concurrent }) {
data = null;

var reads = 0;
var bench_ended = false;
var benchEnded = false;
bench.start();
setTimeout(function() {
bench_ended = true;
benchEnded = true;
bench.end(reads);
try { fs.unlinkSync(filename); } catch (e) {}
process.exit(0);
Expand All @@ -36,14 +37,20 @@ function main({ len, dur, concurrent }) {
}

function afterRead(er, data) {
if (er)
if (er) {
if (er.code === 'ENOENT') {
// Only OK if unlinked by the timer from main.
assert.ok(benchEnded);
return;
}
throw er;
}

if (data.length !== len)
throw new Error('wrong number of bytes returned');

reads++;
if (!bench_ended)
if (!benchEnded)
read();
}

Expand Down
7 changes: 3 additions & 4 deletions doc/api/fs.md
Original file line number Diff line number Diff line change
Expand Up @@ -2251,10 +2251,9 @@ Any specified file descriptor has to support reading.
*Note*: If a file descriptor is specified as the `path`, it will not be closed
automatically.

*Note*: `fs.readFile()` reads the entire file in a single threadpool request.
To minimize threadpool task length variation, prefer the partitioned APIs
`fs.read()` and `fs.createReadStream()` when reading files as part of
fulfilling a client request.
*Note*: `fs.readFile()` buffers the entire file.
To minimize memory costs, when possible prefer streaming via
`fs.createReadStream()`.

## fs.readFileSync(path[, options])
<!-- YAML
Expand Down
2 changes: 1 addition & 1 deletion lib/fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ ReadFileContext.prototype.read = function() {
} else {
buffer = this.buffer;
offset = this.pos;
length = this.size - this.pos;
length = Math.min(kReadFileBufferLength, this.size - this.pos);
}

var req = new FSReqWrap();
Expand Down
58 changes: 58 additions & 0 deletions test/parallel/test-fs-readfile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
'use strict';
const common = require('../common');

// This test ensures that fs.readFile correctly returns the
// contents of varying-sized files.

const assert = require('assert');
const fs = require('fs');
const path = require('path');

const prefix = `.removeme-fs-readfile-${process.pid}`;

common.refreshTmpDir();

const fileInfo = [
{ name: path.join(common.tmpDir, `${prefix}-1K.txt`),
len: 1024,
},
{ name: path.join(common.tmpDir, `${prefix}-64K.txt`),
len: 64 * 1024,
},
{ name: path.join(common.tmpDir, `${prefix}-64KLessOne.txt`),
len: (64 * 1024) - 1,
},
{ name: path.join(common.tmpDir, `${prefix}-1M.txt`),
len: 1 * 1024 * 1024,
},
{ name: path.join(common.tmpDir, `${prefix}-1MPlusOne.txt`),
len: (1 * 1024 * 1024) + 1,
},
];

// Populate each fileInfo (and file) with unique fill.
const sectorSize = 512;
for (const e of fileInfo) {
e.contents = Buffer.allocUnsafe(e.len);

// This accounts for anything unusual in Node's implementation of readFile.
// Using e.g. 'aa...aa' would miss bugs like Node re-reading
// the same section twice instead of two separate sections.
for (let offset = 0; offset < e.len; offset += sectorSize) {
const fillByte = 256 * Math.random();
const nBytesToFill = Math.min(sectorSize, e.len - offset);
e.contents.fill(fillByte, offset, offset + nBytesToFill);
}

fs.writeFileSync(e.name, e.contents);
}
// All files are now populated.

// Test readFile on each size.
for (const e of fileInfo) {
fs.readFile(e.name, common.mustCall((err, buf) => {
console.log(`Validating readFile on file ${e.name} of length ${e.len}`);
assert.ifError(err, 'An error occurred');
assert.deepStrictEqual(buf, e.contents, 'Incorrect file contents');
}));
}