Skip to content

Commit

Permalink
url: spec-compliant URLSearchParams parser
Browse files Browse the repository at this point in the history
The entire `URLSearchParams` class is now fully spec-compliant.

PR-URL: #11858
Fixes: #10821
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
  • Loading branch information
TimothyGu committed Mar 22, 2017
1 parent c98a802 commit c515a98
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 17 deletions.
2 changes: 1 addition & 1 deletion benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams;
const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
method: ['legacy', 'whatwg'],
n: [1e5]
n: [1e6]
});

function useLegacy(n, input) {
Expand Down
115 changes: 101 additions & 14 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
'use strict';

const util = require('util');
const { hexTable, StorageObject } = require('internal/querystring');
const {
hexTable,
isHexTable,
StorageObject
} = require('internal/querystring');
const binding = process.binding('url');
const context = Symbol('context');
const cannotBeBase = Symbol('cannot-be-base');
Expand Down Expand Up @@ -585,23 +589,106 @@ function initSearchParams(url, init) {
url[searchParams] = [];
return;
}
url[searchParams] = getParamsFromObject(querystring.parse(init));
url[searchParams] = parseParams(init);
}

function getParamsFromObject(obj) {
const keys = Object.keys(obj);
const values = [];
for (var i = 0; i < keys.length; i++) {
const name = keys[i];
const value = obj[name];
if (Array.isArray(value)) {
for (const item of value)
values.push(name, item);
} else {
values.push(name, value);
// application/x-www-form-urlencoded parser
// Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser
function parseParams(qs) {
const out = [];
var pairStart = 0;
var lastPos = 0;
var seenSep = false;
var buf = '';
var encoded = false;
var encodeCheck = 0;
var i;
for (i = 0; i < qs.length; ++i) {
const code = qs.charCodeAt(i);

// Try matching key/value pair separator
if (code === 38/*&*/) {
if (pairStart === i) {
// We saw an empty substring between pair separators
lastPos = pairStart = i + 1;
continue;
}

if (lastPos < i)
buf += qs.slice(lastPos, i);
if (encoded)
buf = querystring.unescape(buf);
out.push(buf);

// If `buf` is the key, add an empty value.
if (!seenSep)
out.push('');

seenSep = false;
buf = '';
encoded = false;
encodeCheck = 0;
lastPos = pairStart = i + 1;
continue;
}

// Try matching key/value separator (e.g. '=') if we haven't already
if (!seenSep && code === 61/*=*/) {
// Key/value separator match!
if (lastPos < i)
buf += qs.slice(lastPos, i);
if (encoded)
buf = querystring.unescape(buf);
out.push(buf);

seenSep = true;
buf = '';
encoded = false;
encodeCheck = 0;
lastPos = i + 1;
continue;
}

// Handle + and percent decoding.
if (code === 43/*+*/) {
if (lastPos < i)
buf += qs.slice(lastPos, i);
buf += ' ';
lastPos = i + 1;
} else if (!encoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0) {
// eslint-disable-next-line no-extra-boolean-cast
if (!!isHexTable[code]) {
if (++encodeCheck === 3)
encoded = true;
} else {
encodeCheck = 0;
}
}
}
}
return values;

// Deal with any leftover key or value data

// There is a trailing &. No more processing is needed.
if (pairStart === i)
return out;

if (lastPos < i)
buf += qs.slice(lastPos, i);
if (encoded)
buf = querystring.unescape(buf);
out.push(buf);

// If `buf` is the key, add an empty value.
if (!seenSep)
out.push('');

return out;
}

// Adapted from querystring's implementation.
Expand Down
68 changes: 68 additions & 0 deletions test/fixtures/url-searchparams.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
module.exports = [
['', '', []],
[
'foo=918854443121279438895193',
'foo=918854443121279438895193',
[['foo', '918854443121279438895193']]
],
['foo=bar', 'foo=bar', [['foo', 'bar']]],
['foo=bar&foo=quux', 'foo=bar&foo=quux', [['foo', 'bar'], ['foo', 'quux']]],
['foo=1&bar=2', 'foo=1&bar=2', [['foo', '1'], ['bar', '2']]],
[
"my%20weird%20field=q1!2%22'w%245%267%2Fz8)%3F",
'my+weird+field=q1%212%22%27w%245%267%2Fz8%29%3F',
[['my weird field', 'q1!2"\'w$5&7/z8)?']]
],
['foo%3Dbaz=bar', 'foo%3Dbaz=bar', [['foo=baz', 'bar']]],
['foo=baz=bar', 'foo=baz%3Dbar', [['foo', 'baz=bar']]],
[
'str=foo&arr=1&somenull&arr=2&undef=&arr=3',
'str=foo&arr=1&somenull=&arr=2&undef=&arr=3',
[
['str', 'foo'],
['arr', '1'],
['somenull', ''],
['arr', '2'],
['undef', ''],
['arr', '3']
]
],
[' foo = bar ', '+foo+=+bar+', [[' foo ', ' bar ']]],
['foo=%zx', 'foo=%25zx', [['foo', '%zx']]],
['foo=%EF%BF%BD', 'foo=%EF%BF%BD', [['foo', '\ufffd']]],
// See: https://github.com/joyent/node/issues/3058
['foo&bar=baz', 'foo=&bar=baz', [['foo', ''], ['bar', 'baz']]],
['a=b&c&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]],
['a=b&c=&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]],
['a=b&=c&d=e', 'a=b&=c&d=e', [['a', 'b'], ['', 'c'], ['d', 'e']]],
['a=b&=&d=e', 'a=b&=&d=e', [['a', 'b'], ['', ''], ['d', 'e']]],
['&&foo=bar&&', 'foo=bar', [['foo', 'bar']]],
['&', '', []],
['&&&&', '', []],
['&=&', '=', [['', '']]],
['&=&=', '=&=', [['', ''], ['', '']]],
['=', '=', [['', '']]],
['+', '+=', [[' ', '']]],
['+=', '+=', [[' ', '']]],
['=+', '=+', [['', ' ']]],
['+=&', '+=', [[' ', '']]],
['a&&b', 'a=&b=', [['a', ''], ['b', '']]],
['a=a&&b=b', 'a=a&b=b', [['a', 'a'], ['b', 'b']]],
['&a', 'a=', [['a', '']]],
['&=', '=', [['', '']]],
['a&a&', 'a=&a=', [['a', ''], ['a', '']]],
['a&a&a&', 'a=&a=&a=', [['a', ''], ['a', ''], ['a', '']]],
['a&a&a&a&', 'a=&a=&a=&a=', [['a', ''], ['a', ''], ['a', ''], ['a', '']]],
['a=&a=value&a=', 'a=&a=value&a=', [['a', ''], ['a', 'value'], ['a', '']]],
['foo%20bar=baz%20quux', 'foo+bar=baz+quux', [['foo bar', 'baz quux']]],
['+foo=+bar', '+foo=+bar', [[' foo', ' bar']]],
[
// fake percent encoding
'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
'foo=%25%C2%A9ar&baz=%25A%C2%A9uux&xyzzy=%25%C2%A9ud',
[['foo', '%©ar'], ['baz', '%A©uux'], ['xyzzy', '%©ud']]
],
// always preserve order of key-value pairs
['a=1&b=2&a=3', 'a=1&b=2&a=3', [['a', '1'], ['b', '2'], ['a', '3']]],
['?a', '%3Fa=', [['?a', '']]]
];
29 changes: 27 additions & 2 deletions test/parallel/test-whatwg-url-searchparams.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
'use strict';

require('../common');
const common = require('../common');
const assert = require('assert');
const URL = require('url').URL;
const path = require('path');
const { URL, URLSearchParams } = require('url');

// Tests below are not from WPT.
const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' +
Expand Down Expand Up @@ -77,3 +78,27 @@ assert.throws(() => sp.forEach(1),

m.search = '?a=a&b=b';
assert.strictEqual(sp.toString(), 'a=a&b=b');

const tests = require(path.join(common.fixturesDir, 'url-searchparams.js'));

for (const [input, expected, parsed] of tests) {
if (input[0] !== '?') {
const sp = new URLSearchParams(input);
assert.strictEqual(String(sp), expected);
assert.deepStrictEqual(Array.from(sp), parsed);

m.search = input;
assert.strictEqual(String(m.searchParams), expected);
assert.deepStrictEqual(Array.from(m.searchParams), parsed);
}

{
const sp = new URLSearchParams(`?${input}`);
assert.strictEqual(String(sp), expected);
assert.deepStrictEqual(Array.from(sp), parsed);

m.search = `?${input}`;
assert.strictEqual(String(m.searchParams), expected);
assert.deepStrictEqual(Array.from(m.searchParams), parsed);
}
}

0 comments on commit c515a98

Please sign in to comment.