Skip to content

Commit

Permalink
querystring: improve parse() performance
Browse files Browse the repository at this point in the history
This commit improves parse() performance by ~20-200% with the various
querystring-parse benchmarks.

Some optimization strategies used in this commit include:
* Combining multiple searches (for '&', '=', and '+') on the same
   string into a single loop
* Avoiding string.split()
* Minimizing creation of temporary strings
* Avoiding string decoding if no encoded bytes were found and the
   default string decoder is being used

PR-URL: #5012
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Roman Reiss <me@silverwind.io>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
  • Loading branch information
mscdex committed Feb 13, 2016
1 parent 90451a6 commit a2a69a2
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 64 deletions.
42 changes: 32 additions & 10 deletions benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ var querystring = require('querystring');
var v8 = require('v8');

var bench = common.createBenchmark(main, {
type: ['noencode', 'encodemany', 'encodelast', 'multivalue'],
type: ['noencode',
'multicharsep',
'encodemany',
'encodelast',
'multivalue',
'multivaluemany',
'manypairs'],
n: [1e6],
});

Expand All @@ -13,22 +19,38 @@ function main(conf) {

var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz'
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};
var input = inputs[type];

// Force-optimize querystring.parse() so that the benchmark doesn't get
// disrupted by the optimizer kicking in halfway through.
for (var name in inputs)
querystring.parse(inputs[name]);

v8.setFlagsFromString('--allow_natives_syntax');
eval('%OptimizeFunctionOnNextCall(querystring.parse)');

bench.start();
for (var i = 0; i < n; i += 1)
if (type !== 'multicharsep') {
querystring.parse(input);
bench.end(n);
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input);
} else {
querystring.parse(input, '&&&&&&&&&&');
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input, '&&&&&&&&&&');
}

if (type !== 'multicharsep') {
bench.start();
for (var i = 0; i < n; i += 1)
querystring.parse(input);
bench.end(n);
} else {
bench.start();
for (var i = 0; i < n; i += 1)
querystring.parse(input, '&&&&&&&&&&');
bench.end(n);
}
}
212 changes: 158 additions & 54 deletions lib/querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
};


QueryString.unescape = function(s, decodeSpaces) {
function qsUnescape(s, decodeSpaces) {
try {
return decodeURIComponent(s);
} catch (e) {
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
}
};
}
QueryString.unescape = qsUnescape;


var hexTable = new Array(256);
Expand Down Expand Up @@ -198,87 +199,190 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
return '';
};

// Parse a key=val string.
// Parse a key/val string.
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';
const eqLen = eq.length;
var obj = {};

const obj = {};

if (typeof qs !== 'string' || qs.length === 0) {
return obj;
}

if (typeof sep !== 'string')
sep += '';

const eqLen = eq.length;
const sepLen = sep.length;

var maxKeys = 1000;
if (options && typeof options.maxKeys === 'number') {
maxKeys = options.maxKeys;
}

// maxKeys <= 0 means that we should not limit keys count
if (maxKeys > 0 && isFinite(maxKeys)) {
qs = qs.split(sep, maxKeys);
} else {
qs = qs.split(sep);
}

var len = qs.length;
var pairs = Infinity;
if (maxKeys > 0)
pairs = maxKeys;

var decode = QueryString.unescape;
if (options && typeof options.decodeURIComponent === 'function') {
decode = options.decodeURIComponent;
}

var keys = [];
for (var i = 0; i < len; ++i) {
// replacePlus() is used instead of a regexp because it is ~15-30% faster
// with v8 4.7
const x = replacePlus(qs[i]);
const idx = x.indexOf(eq);
var k, v;

if (idx >= 0) {
k = decodeStr(x.substring(0, idx), decode);
v = decodeStr(x.substring(idx + eqLen), decode);
const customDecode = (decode !== qsUnescape);

const keys = [];
var lastPos = 0;
var sepIdx = 0;
var eqIdx = 0;
var key = '';
var value = '';
var keyEncoded = customDecode;
var valEncoded = customDecode;
var encodeCheck = 0;
for (var i = 0; i < qs.length; ++i) {
const code = qs.charCodeAt(i);

// Try matching key/value pair separator (e.g. '&')
if (code === sep.charCodeAt(sepIdx)) {
if (++sepIdx === sepLen) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
if (eqIdx < eqLen) {
// If we didn't find the key/value separator, treat the substring as
// part of the key instead of the value
if (lastPos < end)
key += qs.slice(lastPos, end);
} else if (lastPos < end)
value += qs.slice(lastPos, end);
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
if (--pairs === 0)
break;
keyEncoded = valEncoded = customDecode;
encodeCheck = 0;
key = value = '';
lastPos = i + 1;
sepIdx = eqIdx = 0;
}
continue;
} else {
k = decodeStr(x, decode);
v = '';
sepIdx = 0;
if (!valEncoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
(code >= 97/*a*/ && code <= 102/*z*/))) {
if (++encodeCheck === 3)
valEncoded = true;
} else {
encodeCheck = 0;
}
}
}

// Use a key array lookup instead of using hasOwnProperty(), which is slower
if (keys.indexOf(k) === -1) {
obj[k] = v;
keys.push(k);
} else if (obj[k] instanceof Array) {
// `instanceof Array` is used instead of Array.isArray() because it is
// ~15-20% faster with v8 4.7 and is safe to use because we are using it
// with values being created within this function
obj[k].push(v);
// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eq.charCodeAt(eqIdx)) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
if (lastPos < end)
key += qs.slice(lastPos, end);
encodeCheck = 0;
lastPos = i + 1;
}
continue;
} else {
eqIdx = 0;
if (!keyEncoded) {
// Try to match an (valid) encoded byte once to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*Z*/) ||
(code >= 97/*a*/ && code <= 102/*z*/))) {
if (++encodeCheck === 3)
keyEncoded = true;
} else {
encodeCheck = 0;
}
}
}
}

if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (i - lastPos > 0)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (i - lastPos > 0)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
}
lastPos = i + 1;
}
}

// Check if we have leftover key or value data
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
else if (sepIdx < sepLen)
value += qs.slice(lastPos);
}
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
obj[k] = [obj[k], v];
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
}

return obj;
};


function replacePlus(str) {
var ret = '';
var start = 0;
var i = -1;
while ((i = str.indexOf('+', i + 1)) !== -1) {
ret += str.slice(start, i);
ret += '%20';
start = i + 1;
}
if (start === 0)
return str;
if (start < str.length)
ret += str.slice(start);
return ret;
}


// v8 does not optimize functions with try-catch blocks, so we isolate them here
// to minimize the damage
function decodeStr(s, decoder) {
Expand Down
3 changes: 3 additions & 0 deletions test/parallel/test-querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,6 @@ qs.unescape = function(str) {
};
assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
qs.unescape = prevUnescape;

// test separator and "equals" parsing order
assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });

0 comments on commit a2a69a2

Please sign in to comment.