Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Percent decode #1361

Merged
merged 13 commits into from
Aug 15, 2024
147 changes: 128 additions & 19 deletions packages/core-js/modules/web.url-search-params.constructor.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
'use strict';
// TODO: in core-js@4, move /modules/ dependencies to public entries for better optimization by tools like `preset-env`
require('../modules/es.array.iterator');
require('../modules/es.string.from-code-point');
var $ = require('../internals/export');
var globalThis = require('../internals/global-this');
var safeGetBuiltIn = require('../internals/safe-get-built-in');
var getBuiltIn = require('../internals/get-built-in');
var call = require('../internals/function-call');
var uncurryThis = require('../internals/function-uncurry-this');
var DESCRIPTORS = require('../internals/descriptors');
Expand All @@ -30,6 +32,7 @@ var createIterResultObject = require('../internals/create-iter-result-object');
var validateArgumentsLength = require('../internals/validate-arguments-length');
var wellKnownSymbol = require('../internals/well-known-symbol');
var arraySort = require('../internals/array-sort');
var padStart = require('../internals/string-pad').start;

var ITERATOR = wellKnownSymbol('iterator');
var URL_SEARCH_PARAMS = 'URLSearchParams';
Expand All @@ -43,9 +46,7 @@ var NativeRequest = safeGetBuiltIn('Request');
var Headers = safeGetBuiltIn('Headers');
var RequestPrototype = NativeRequest && NativeRequest.prototype;
var HeadersPrototype = Headers && Headers.prototype;
var RegExp = globalThis.RegExp;
var TypeError = globalThis.TypeError;
var decodeURIComponent = globalThis.decodeURIComponent;
var encodeURIComponent = globalThis.encodeURIComponent;
var charAt = uncurryThis(''.charAt);
var join = uncurryThis([].join);
Expand All @@ -57,31 +58,139 @@ var split = uncurryThis(''.split);
var stringSlice = uncurryThis(''.slice);

var plus = /\+/g;
var sequences = Array(4);
var FALLBACK_REPLACER = '\uFFFD';
var VALID_HEX = /^[0-9a-f]+$/i;

var percentSequence = function (bytes) {
return sequences[bytes - 1] || (sequences[bytes - 1] = RegExp('((?:%[\\da-f]{2}){' + bytes + '})', 'gi'));
var indexOf = uncurryThis(''.indexOf);
var numberToString = uncurryThis(1.0.toString);
var fromCharCode = String.fromCharCode;
var fromCodePoint = getBuiltIn('String', 'fromCodePoint');
var $parseInt = parseInt;
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved

var parseHexOctet = function (string, start) {
var substr = stringSlice(string, start, start + 2);
if (!VALID_HEX.test(substr)) return NaN;
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved

return $parseInt(substr, 16);
};

var percentDecode = function (sequence) {
try {
return decodeURIComponent(sequence);
} catch (error) {
return sequence;
var getLeadingOnes = function (octet) {
var binString = padStart(numberToString(octet, 2), 8, '0');
return indexOf(binString, '0') !== -1 ? indexOf(binString, '0') : binString.length;
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
};
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome -)


var utf8Decode = function (octets) {
var len = octets.length;
var codePoint = null;

switch (len) {
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
case 1:
codePoint = octets[0];
break;
case 2:
codePoint = (octets[0] & 0x1F) << 6 | (octets[1] & 0x3F);
break;
case 3:
codePoint = (octets[0] & 0x0F) << 12 | (octets[1] & 0x3F) << 6 | (octets[2] & 0x3F);
break;
case 4:
codePoint = (octets[0] & 0x07) << 18 | (octets[1] & 0x3F) << 12 | (octets[2] & 0x3F) << 6 | (octets[3] & 0x3F);
break;
}

return codePoint > 0x10FFFF ? null : codePoint;
};

var deserialize = function (it) {
var result = replace(it, plus, ' ');
var bytes = 4;
try {
return decodeURIComponent(result);
} catch (error) {
while (bytes) {
result = replace(result, percentSequence(bytes--), percentDecode);
/* eslint-disable max-statements -- TODO */
var decode = function (input) {
var length = input.length;
var result = '';
var i = 0;

while (i < length) {
var decodedChar = input[i];
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved

if (decodedChar === '%') {
if (i + 3 > length && i + 1 !== length) {
/* eslint-disable no-useless-assignment -- TODO */
decodedChar = FALLBACK_REPLACER;
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
break;
}

if (input[i + 1] === '%' || i + 1 === length) {
result += '%';
i++;
continue;
}

var octet = parseHexOctet(input, i + 1);

if (isNaN(octet)) {
result += decodedChar;
i++;
continue;
}

i += 2;
var byteSequenceLength = getLeadingOnes(octet);

if (byteSequenceLength === 0) {
decodedChar = fromCharCode(octet);
} else {
if (byteSequenceLength === 1 || byteSequenceLength > 4) {
result += FALLBACK_REPLACER;
i++;
continue;
}

var octets = [octet];
var sequenceIndex = 1;

while (sequenceIndex < byteSequenceLength) {
i++;
if (i + 3 > length || input[i] !== '%') {
break;
}

var nextByte = parseHexOctet(input, i + 1);

if (nextByte > 191 || nextByte < 128) { // incorrect next byte
break;
}

if (isNaN(nextByte)) {
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
i += 3;
break;
}
octets.push(nextByte);
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
i += 2;
sequenceIndex++;
}

if (octets.length !== byteSequenceLength) {
result += FALLBACK_REPLACER;
continue;
}

var codePoint = utf8Decode(octets);
if (codePoint === null) {
result += FALLBACK_REPLACER;
} else {
decodedChar = fromCodePoint(codePoint);
}
}
}
return result;

result += decodedChar;
i++;
}

return result;
};

var deserialize = function (it) {
var result = replace(it, plus, ' ');
return decode(result);
};
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved

var find = /[!'()~]|%20/g;
Expand Down
29 changes: 29 additions & 0 deletions tests/unit-global/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,35 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');
assert.same(String(new URLSearchParams('%25')), '%25=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down
29 changes: 29 additions & 0 deletions tests/unit-pure/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,35 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');
assert.same(String(new URLSearchParams('%25')), '%25=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down