From ff785fd51792a95281014b5e9b526c6619450dc9 Mon Sep 17 00:00:00 2001 From: Brian White Date: Wed, 8 Feb 2017 03:23:03 -0500 Subject: [PATCH] querystring: fix empty pairs and optimize parse() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes handling of empty pairs that occur before the end of the query string so that they are also ignored. Additionally, some optimizations have been made, including: * Avoid unnecessary code execution where possible * Use a lookup table when checking for hex characters * Avoid forced decoding when '+' characters are encountered and we are using the default decoder Fixes: https://github.com/nodejs/node/issues/10454 PR-URL: https://github.com/nodejs/node/pull/11234 Reviewed-By: James M Snell Reviewed-By: Nicu Micleușanu --- benchmark/querystring/querystring-parse.js | 40 ++-- lib/querystring.js | 246 +++++++++++---------- test/parallel/test-querystring.js | 9 + 3 files changed, 166 insertions(+), 129 deletions(-) diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js index fe14d95a53f0a0..0ed677751280c0 100644 --- a/benchmark/querystring/querystring-parse.js +++ b/benchmark/querystring/querystring-parse.js @@ -12,7 +12,9 @@ var inputs = { multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', - manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' + manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z', + manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&', + altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi' }; var bench = common.createBenchmark(main, { @@ -20,34 +22,38 @@ var bench = common.createBenchmark(main, { n: [1e6], }); +// A deopt followed by a reopt of main() can happen right when the timed loop +// starts, which seems to have a noticeable effect on the benchmark results. +// So we explicitly disable optimization of main() to avoid this potential +// issue. +v8.setFlagsFromString('--allow_natives_syntax'); +eval('%NeverOptimizeFunction(main)'); + function main(conf) { var type = conf.type; var n = conf.n | 0; var input = inputs[type]; + var i; - // Force-optimize querystring.parse() so that the benchmark doesn't get - // disrupted by the optimizer kicking in halfway through. - v8.setFlagsFromString('--allow_natives_syntax'); - if (type !== 'multicharsep') { - querystring.parse(input); - eval('%OptimizeFunctionOnNextCall(querystring.parse)'); - querystring.parse(input); - } else { - querystring.parse(input, '&&&&&&&&&&'); - eval('%OptimizeFunctionOnNextCall(querystring.parse)'); - querystring.parse(input, '&&&&&&&&&&'); - } + // Note: we do *not* use OptimizeFunctionOnNextCall() here because currently + // it causes a deopt followed by a reopt, which could make its way into the + // timed loop. Instead, just execute the function a "sufficient" number of + // times before the timed loop to ensure the function is optimized just once. + if (type === 'multicharsep') { + for (i = 0; i < n; i += 1) + querystring.parse(input, '&&&&&&&&&&'); - var i; - if (type !== 'multicharsep') { bench.start(); for (i = 0; i < n; i += 1) - querystring.parse(input); + querystring.parse(input, '&&&&&&&&&&'); bench.end(n); } else { + for (i = 0; i < n; i += 1) + querystring.parse(input); + bench.start(); for (i = 0; i < n; i += 1) - querystring.parse(input, '&&&&&&&&&&'); + querystring.parse(input); bench.end(n); } } diff --git a/lib/querystring.js b/lib/querystring.js index 9b4ca27640aa71..32ba30712090a4 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -250,6 +250,25 @@ function stringify(obj, sep, eq, options) { return ''; } +const isHexTable = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ... + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256 +]; + function charCodes(str) { if (str.length === 0) return []; if (str.length === 1) return [str.charCodeAt(0)]; @@ -292,7 +311,6 @@ function parse(qs, sep, eq, options) { const customDecode = (decode !== qsUnescape); const keys = []; - var posIdx = 0; var lastPos = 0; var sepIdx = 0; var eqIdx = 0; @@ -300,6 +318,7 @@ function parse(qs, sep, eq, options) { var value = ''; var keyEncoded = customDecode; var valEncoded = customDecode; + const plusChar = (customDecode ? '%20' : ' '); var encodeCheck = 0; for (var i = 0; i < qs.length; ++i) { const code = qs.charCodeAt(i); @@ -310,142 +329,145 @@ function parse(qs, sep, eq, options) { // Key/value pair separator match! const end = i - sepIdx + 1; if (eqIdx < eqLen) { - // If we didn't find the key/value separator, treat the substring as - // part of the key instead of the value - if (lastPos < end) + // We didn't find the (entire) key/value separator + if (lastPos < end) { + // Treat the substring as part of the key instead of the value key += qs.slice(lastPos, end); - } else if (lastPos < end) - value += qs.slice(lastPos, end); - if (keyEncoded) - key = decodeStr(key, decode); - if (valEncoded) - value = decodeStr(value, decode); - - if (key || value || lastPos - posIdx > sepLen || i === 0) { - // Use a key array lookup instead of using hasOwnProperty(), which is - // slower - if (keys.indexOf(key) === -1) { - obj[key] = value; - keys[keys.length] = key; + if (keyEncoded) + key = decodeStr(key, decode); } else { - const curValue = obj[key] || ''; - // A simple Array-specific property check is enough here to - // distinguish from a string value and is faster and still safe - // since we are generating all of the values being assigned. - if (curValue.pop) - curValue[curValue.length] = value; - else if (curValue) - obj[key] = [curValue, value]; + // We saw an empty substring between separators + if (--pairs === 0) + return obj; + lastPos = i + 1; + sepIdx = eqIdx = 0; + continue; + } + } else { + if (lastPos < end) { + value += qs.slice(lastPos, end); + if (valEncoded) + value = decodeStr(value, decode); } - } else if (i === 1) { - // A pair with repeated sep could be added into obj in the first loop - // and it should be deleted - delete obj[key]; + if (keyEncoded) + key = decodeStr(key, decode); + } + + // Use a key array lookup instead of using hasOwnProperty(), which is + // slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; + } else { + const curValue = obj[key]; + // A simple Array-specific property check is enough here to + // distinguish from a string value and is faster and still safe + // since we are generating all of the values being assigned. + if (curValue.pop) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; } if (--pairs === 0) - break; + return obj; keyEncoded = valEncoded = customDecode; - encodeCheck = 0; key = value = ''; - posIdx = lastPos; + encodeCheck = 0; lastPos = i + 1; sepIdx = eqIdx = 0; } - continue; } else { sepIdx = 0; - if (!valEncoded) { - // Try to match an (valid) encoded byte (once) to minimize unnecessary - // calls to string decoding functions - if (code === 37/*%*/) { - encodeCheck = 1; - } else if (encodeCheck > 0 && - ((code >= 48/*0*/ && code <= 57/*9*/) || - (code >= 65/*A*/ && code <= 70/*F*/) || - (code >= 97/*a*/ && code <= 102/*f*/))) { - if (++encodeCheck === 3) - valEncoded = true; + // Try matching key/value separator (e.g. '=') if we haven't already + if (eqIdx < eqLen) { + if (code === eqCodes[eqIdx]) { + if (++eqIdx === eqLen) { + // Key/value separator match! + const end = i - eqIdx + 1; + if (lastPos < end) + key += qs.slice(lastPos, end); + encodeCheck = 0; + lastPos = i + 1; + } + continue; } else { - encodeCheck = 0; + eqIdx = 0; + if (!keyEncoded) { + // Try to match an (valid) encoded byte once to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + continue; + } else if (encodeCheck > 0) { + // eslint-disable-next-line no-extra-boolean-cast + if (!!isHexTable[code]) { + if (++encodeCheck === 3) + keyEncoded = true; + continue; + } else { + encodeCheck = 0; + } + } + } } - } - } - - // Try matching key/value separator (e.g. '=') if we haven't already - if (eqIdx < eqLen) { - if (code === eqCodes[eqIdx]) { - if (++eqIdx === eqLen) { - // Key/value separator match! - const end = i - eqIdx + 1; - if (lastPos < end) - key += qs.slice(lastPos, end); - encodeCheck = 0; + if (code === 43/*+*/) { + if (lastPos < i) + key += qs.slice(lastPos, i); + key += plusChar; lastPos = i + 1; + continue; } - continue; - } else { - eqIdx = 0; - if (!keyEncoded) { - // Try to match an (valid) encoded byte once to minimize unnecessary - // calls to string decoding functions - if (code === 37/*%*/) { - encodeCheck = 1; - } else if (encodeCheck > 0 && - ((code >= 48/*0*/ && code <= 57/*9*/) || - (code >= 65/*A*/ && code <= 70/*F*/) || - (code >= 97/*a*/ && code <= 102/*f*/))) { + } + if (code === 43/*+*/) { + if (lastPos < i) + value += qs.slice(lastPos, i); + value += plusChar; + lastPos = i + 1; + } else if (!valEncoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0) { + // eslint-disable-next-line no-extra-boolean-cast + if (!!isHexTable[code]) { if (++encodeCheck === 3) - keyEncoded = true; + valEncoded = true; } else { encodeCheck = 0; } } } } - - if (code === 43/*+*/) { - if (eqIdx < eqLen) { - if (lastPos < i) - key += qs.slice(lastPos, i); - key += '%20'; - keyEncoded = true; - } else { - if (lastPos < i) - value += qs.slice(lastPos, i); - value += '%20'; - valEncoded = true; - } - lastPos = i + 1; - } } - // Check if we have leftover key or value data - if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) { - if (lastPos < qs.length) { - if (eqIdx < eqLen) - key += qs.slice(lastPos); - else if (sepIdx < sepLen) - value += qs.slice(lastPos); - } - if (keyEncoded) - key = decodeStr(key, decode); - if (valEncoded) - value = decodeStr(value, decode); - // Use a key array lookup instead of using hasOwnProperty(), which is - // slower - if (keys.indexOf(key) === -1) { - obj[key] = value; - keys[keys.length] = key; - } else { - const curValue = obj[key]; - // A simple Array-specific property check is enough here to - // distinguish from a string value and is faster and still safe since - // we are generating all of the values being assigned. - if (curValue.pop) - curValue[curValue.length] = value; - else - obj[key] = [curValue, value]; - } + // Deal with any leftover key or value data + if (lastPos < qs.length) { + if (eqIdx < eqLen) + key += qs.slice(lastPos); + else if (sepIdx < sepLen) + value += qs.slice(lastPos); + } else if (eqIdx === 0) { + // We ended on an empty substring + return obj; + } + if (keyEncoded) + key = decodeStr(key, decode); + if (valEncoded) + value = decodeStr(value, decode); + // Use a key array lookup instead of using hasOwnProperty(), which is slower + if (keys.indexOf(key) === -1) { + obj[key] = value; + keys[keys.length] = key; + } else { + const curValue = obj[key]; + // A simple Array-specific property check is enough here to + // distinguish from a string value and is faster and still safe since + // we are generating all of the values being assigned. + if (curValue.pop) + curValue[curValue.length] = value; + else + obj[key] = [curValue, value]; } return obj; diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js index baa426094c77c6..04034377fea76d 100644 --- a/test/parallel/test-querystring.js +++ b/test/parallel/test-querystring.js @@ -59,6 +59,15 @@ const qsTestCases = [ ['&&&&', '', {}], ['&=&', '=', { '': '' }], ['&=&=', '=&=', { '': [ '', '' ]}], + ['a&&b', 'a=&b=', { 'a': '', 'b': '' }], + ['a=a&&b=b', 'a=a&b=b', { 'a': 'a', 'b': 'b' }], + ['&a', 'a=', { 'a': '' }], + ['&=', '=', { '': '' }], + ['a&a&', 'a=&a=', { a: [ '', '' ] }], + ['a&a&a&', 'a=&a=&a=', { a: [ '', '', '' ] }], + ['a&a&a&a&', 'a=&a=&a=&a=', { a: [ '', '', '', '' ] }], + ['a=&a=value&a=', 'a=&a=value&a=', { a: [ '', 'value', '' ] }], + ['foo+bar=baz+quux', 'foo%20bar=baz%20quux', { 'foo bar': 'baz quux' }], [null, '', {}], [undefined, '', {}] ];