Skip to content

Commit

Permalink
buffer: optimize writing short strings
Browse files Browse the repository at this point in the history
PR-URL: nodejs#54310
  • Loading branch information
ronag committed Aug 11, 2024
1 parent 298ff4f commit 1091051
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 324 deletions.
20 changes: 20 additions & 0 deletions benchmark/buffers/buffer-write-string-short.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
'use strict';

const common = require('../common.js');
const bench = common.createBenchmark(main, {
encoding: [
'', 'utf8', 'ascii', 'latin1',
],
len: [0, 1, 8, 16, 32],
n: [1e6],
});

function main({ len, n, encoding }) {
const buf = Buffer.allocUnsafe(len);
const string = Buffer.from('a'.repeat(len)).toString()
bench.start();
for (let i = 0; i < n; ++i) {
buf.write(string, 0, encoding);
}
bench.end(n);
}
155 changes: 110 additions & 45 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@

const {
Array,
ArrayBufferIsView,
ArrayFrom,
ArrayIsArray,
ArrayPrototypeForEach,
ArrayPrototypeIndexOf,
MathFloor,
MathMin,
MathTrunc,
NumberIsInteger,
NumberIsNaN,
NumberMAX_SAFE_INTEGER,
NumberMIN_SAFE_INTEGER,
Expand All @@ -43,10 +43,10 @@ const {
StringPrototypeTrim,
SymbolSpecies,
SymbolToPrimitive,
TypedArrayPrototypeFill,
TypedArrayPrototypeGetBuffer,
TypedArrayPrototypeGetByteLength,
TypedArrayPrototypeGetByteOffset,
TypedArrayPrototypeFill,
TypedArrayPrototypeGetLength,
TypedArrayPrototypeSet,
TypedArrayPrototypeSlice,
Expand All @@ -58,7 +58,6 @@ const {
byteLengthUtf8,
compare: _compare,
compareOffset,
copy: _copy,
createFromString,
fill: bindingFill,
isAscii: bindingIsAscii,
Expand All @@ -71,9 +70,10 @@ const {
swap64: _swap64,
kMaxLength,
kStringMaxLength,
atob: _atob,
btoa: _btoa,
} = internalBinding('buffer');

const bufferBinding = internalBinding('buffer');

const {
constants: {
ALL_PROPERTIES,
Expand All @@ -88,7 +88,6 @@ const {
normalizeEncoding,
kIsEncodingSymbol,
defineLazyProperties,
encodingsMap,
} = require('internal/util');
const {
isAnyArrayBuffer,
Expand All @@ -99,15 +98,16 @@ const {
const {
inspect: utilInspect,
} = require('internal/util/inspect');
const { encodings } = internalBinding('string_decoder');

const {
codes: {
ERR_BUFFER_OUT_OF_BOUNDS,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_ARG_VALUE,
ERR_INVALID_BUFFER_SIZE,
ERR_MISSING_ARGS,
ERR_OUT_OF_RANGE,
ERR_MISSING_ARGS,
ERR_UNKNOWN_ENCODING,
},
genericNodeError,
Expand Down Expand Up @@ -152,6 +152,10 @@ const constants = ObjectDefineProperties({}, {
Buffer.poolSize = 8 * 1024;
let poolSize, poolOffset, allocPool;

const encodingsMap = { __proto__: null };
for (let i = 0; i < encodings.length; ++i)
encodingsMap[encodings[i]] = i;

function createPool() {
poolSize = Buffer.poolSize;
allocPool = createUnsafeBuffer(poolSize).buffer;
Expand Down Expand Up @@ -202,55 +206,55 @@ function toInteger(n, defaultVal) {
return defaultVal;
}

function copyImpl(source, target, targetStart, sourceStart, sourceEnd) {
if (!ArrayBufferIsView(source))
function _copy(source, target, targetStart, sourceStart, sourceEnd) {
if (!isUint8Array(source))
throw new ERR_INVALID_ARG_TYPE('source', ['Buffer', 'Uint8Array'], source);
if (!ArrayBufferIsView(target))
if (!isUint8Array(target))
throw new ERR_INVALID_ARG_TYPE('target', ['Buffer', 'Uint8Array'], target);

if (targetStart === undefined) {
targetStart = 0;
} else {
targetStart = NumberIsInteger(targetStart) ? targetStart : toInteger(targetStart, 0);
targetStart = toInteger(targetStart, 0);
if (targetStart < 0)
throw new ERR_OUT_OF_RANGE('targetStart', '>= 0', targetStart);
}

if (sourceStart === undefined) {
sourceStart = 0;
} else {
sourceStart = NumberIsInteger(sourceStart) ? sourceStart : toInteger(sourceStart, 0);
if (sourceStart < 0 || sourceStart > source.byteLength)
throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.byteLength}`, sourceStart);
sourceStart = toInteger(sourceStart, 0);
if (sourceStart < 0 || sourceStart > source.length)
throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.length}`, sourceStart);
}

if (sourceEnd === undefined) {
sourceEnd = source.byteLength;
sourceEnd = source.length;
} else {
sourceEnd = NumberIsInteger(sourceEnd) ? sourceEnd : toInteger(sourceEnd, 0);
sourceEnd = toInteger(sourceEnd, 0);
if (sourceEnd < 0)
throw new ERR_OUT_OF_RANGE('sourceEnd', '>= 0', sourceEnd);
}

if (targetStart >= target.byteLength || sourceStart >= sourceEnd)
if (targetStart >= target.length || sourceStart >= sourceEnd)
return 0;

return _copyActual(source, target, targetStart, sourceStart, sourceEnd);
}

function _copyActual(source, target, targetStart, sourceStart, sourceEnd) {
if (sourceEnd - sourceStart > target.byteLength - targetStart)
sourceEnd = sourceStart + target.byteLength - targetStart;
if (sourceEnd - sourceStart > target.length - targetStart)
sourceEnd = sourceStart + target.length - targetStart;

let nb = sourceEnd - sourceStart;
const sourceLen = source.byteLength - sourceStart;
const sourceLen = source.length - sourceStart;
if (nb > sourceLen)
nb = sourceLen;

if (nb <= 0)
return 0;
if (sourceStart !== 0 || sourceEnd < source.length)
source = new Uint8Array(source.buffer, source.byteOffset + sourceStart, nb);

_copy(source, target, targetStart, sourceStart, nb);
TypedArrayPrototypeSet(target, source, targetStart);

return nb;
}
Expand Down Expand Up @@ -620,7 +624,7 @@ const encodingOps = {
encoding: 'utf8',
encodingVal: encodingsMap.utf8,
byteLength: byteLengthUtf8,
write: (buf, string, offset, len) => buf.utf8Write(string, offset, len),
write: (buf, string, offset, len) => bufferBinding.utf8WriteStatic(buf, string, offset, len),
slice: (buf, start, end) => buf.utf8Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir),
Expand All @@ -647,7 +651,7 @@ const encodingOps = {
encoding: 'latin1',
encodingVal: encodingsMap.latin1,
byteLength: (string) => string.length,
write: (buf, string, offset, len) => buf.latin1Write(string, offset, len),
write: (buf, string, offset, len) => bufferBinding.latin1WriteStatic(buf, string, offset, len),
slice: (buf, start, end) => buf.latin1Slice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir),
Expand All @@ -656,7 +660,7 @@ const encodingOps = {
encoding: 'ascii',
encodingVal: encodingsMap.ascii,
byteLength: (string) => string.length,
write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len),
write: (buf, string, offset, len) => bufferBinding.asciiWriteStatic(buf, string, offset, len),
slice: (buf, start, end) => buf.asciiSlice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfBuffer(buf,
Expand Down Expand Up @@ -804,7 +808,7 @@ ObjectDefineProperty(Buffer.prototype, 'offset', {

Buffer.prototype.copy =
function copy(target, targetStart, sourceStart, sourceEnd) {
return copyImpl(this, target, targetStart, sourceStart, sourceEnd);
return _copy(this, target, targetStart, sourceStart, sourceEnd);
};

// No need to verify that "buf.length <= MAX_UINT32" since it's a read-only
Expand Down Expand Up @@ -1253,41 +1257,102 @@ function btoa(input) {
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('input');
}
const result = _btoa(`${input}`);
if (result === -1) {
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
input = `${input}`;
for (let n = 0; n < input.length; n++) {
if (input[n].charCodeAt(0) > 0xff)
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
return result;
const buf = Buffer.from(input, 'latin1');
return buf.toString('base64');
}

// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
const kForgivingBase64AllowedChars = [
// ASCII whitespace
// Refs: https://infra.spec.whatwg.org/#ascii-whitespace
0x09, 0x0A, 0x0C, 0x0D, 0x20,

// Uppercase letters
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),

// Lowercase letters
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),

// Decimal digits
...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),

0x2B, // +
0x2F, // /
0x3D, // =
];
const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
0x3D);

function atob(input) {
// The implementation here has not been performance optimized in any way and
// should not be.
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('input');
}

const result = _atob(`${input}`);
input = `${input}`;
let nonAsciiWhitespaceCharCount = 0;
let equalCharCount = 0;

for (let n = 0; n < input.length; n++) {
const index = ArrayPrototypeIndexOf(
kForgivingBase64AllowedChars,
StringPrototypeCharCodeAt(input, n));

if (index > 4) {
// The first 5 elements of `kForgivingBase64AllowedChars` are
// ASCII whitespace char codes.
nonAsciiWhitespaceCharCount++;

if (index === kEqualSignIndex) {
equalCharCount++;
} else if (equalCharCount) {
// The `=` char is only allowed at the end.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}

switch (result) {
case -2: // Invalid character
if (equalCharCount > 2) {
// Only one more `=` is permitted after the first equal sign.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
} else if (index === -1) {
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
case -1: // Single character remained
throw lazyDOMException(
'The string to be decoded is not correctly encoded.',
'InvalidCharacterError');
case -3: // Possible overflow
// TODO(@anonrig): Throw correct error in here.
throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
default:
return result;
}
}

let reminder = nonAsciiWhitespaceCharCount % 4;

// See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
if (!reminder) {
// Remove all trailing `=` characters and get the new reminder.
reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
} else if (equalCharCount) {
// `=` should not in the input if there's a reminder.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}

// See #3 - https://infra.spec.whatwg.org/#forgiving-base64
if (reminder === 1) {
throw lazyDOMException(
'The string to be decoded is not correctly encoded.',
'InvalidCharacterError');
}

return Buffer.from(input, 'base64').toString('latin1');
}

function isUtf8(input) {
if (isTypedArray(input) || isAnyArrayBuffer(input)) {
return bindingIsUtf8(input);
}

throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'Buffer', 'TypedArray'], input);
throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input);
}

function isAscii(input) {
Expand Down
Loading

0 comments on commit 1091051

Please sign in to comment.