From 8dbd562590047b35b97b1064b6f1726057979093 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 22:06:04 -0800 Subject: [PATCH] url: fix surrogate handling in encodeAuth() Also factor out common parts in querystring and url. Backport-of: https://github.com/nodejs/node/pull/11161 --- lib/internal/querystring.js | 15 +++++++ lib/internal/url.js | 72 +------------------------------ lib/querystring.js | 15 ++----- lib/url.js | 84 +++++++++++++++++++++++++++++++++---- node.gyp | 1 + test/parallel/test-url.js | 9 ++++ 6 files changed, 104 insertions(+), 92 deletions(-) create mode 100644 lib/internal/querystring.js diff --git a/lib/internal/querystring.js b/lib/internal/querystring.js new file mode 100644 index 00000000000000..2f8d77d3e9d2e7 --- /dev/null +++ b/lib/internal/querystring.js @@ -0,0 +1,15 @@ +'use strict'; + +const hexTable = new Array(256); +for (var i = 0; i < 256; ++i) + hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); + +// Instantiating this is faster than explicitly calling `Object.create(null)` +// to get a "clean" empty object (tested with v8 v4.9). +function StorageObject() {} +StorageObject.prototype = Object.create(null); + +module.exports = { + hexTable, + StorageObject +}; diff --git a/lib/internal/url.js b/lib/internal/url.js index 01f8ed52cb9c4e..c799ed839b84d8 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,6 +1,7 @@ 'use strict'; const util = require('util'); +const { StorageObject } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -22,9 +23,6 @@ const IteratorPrototype = Object.getPrototypeOf( Object.getPrototypeOf([][Symbol.iterator]()) ); -function StorageObject() {} -StorageObject.prototype = Object.create(null); - class OpaqueOrigin { toString() { return 'null'; @@ -528,73 +526,6 @@ Object.defineProperties(URL.prototype, { } }); -const hexTable = new Array(256); - -for (var i = 0; i < 256; ++i) - hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); -function encodeAuth(str) { - // faster encodeURIComponent alternative for encoding auth uri components - var out = ''; - var lastPos = 0; - for (var i = 0; i < str.length; ++i) { - var c = str.charCodeAt(i); - - // These characters do not need escaping: - // ! - . _ ~ - // ' ( ) * : - // digits - // alpha (uppercase) - // alpha (lowercase) - if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E || - (c >= 0x27 && c <= 0x2A) || - (c >= 0x30 && c <= 0x3A) || - (c >= 0x41 && c <= 0x5A) || - (c >= 0x61 && c <= 0x7A)) { - continue; - } - - if (i - lastPos > 0) - out += str.slice(lastPos, i); - - lastPos = i + 1; - - // Other ASCII characters - if (c < 0x80) { - out += hexTable[c]; - continue; - } - - // Multi-byte characters ... - if (c < 0x800) { - out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; - continue; - } - if (c < 0xD800 || c >= 0xE000) { - out += hexTable[0xE0 | (c >> 12)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - continue; - } - // Surrogate pair - ++i; - var c2; - if (i < str.length) - c2 = str.charCodeAt(i) & 0x3FF; - else - c2 = 0; - c = 0x10000 + (((c & 0x3FF) << 10) | c2); - out += hexTable[0xF0 | (c >> 18)] + - hexTable[0x80 | ((c >> 12) & 0x3F)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - } - if (lastPos === 0) - return str; - if (lastPos < str.length) - return out + str.slice(lastPos); - return out; -} - function update(url, params) { if (!url) return; @@ -1212,7 +1143,6 @@ exports.URL = URL; exports.URLSearchParams = URLSearchParams; exports.domainToASCII = domainToASCII; exports.domainToUnicode = domainToUnicode; -exports.encodeAuth = encodeAuth; exports.urlToOptions = urlToOptions; exports.formatSymbol = kFormat; exports.searchParamsSymbol = searchParams; diff --git a/lib/querystring.js b/lib/querystring.js index 5ccb5fa77b320f..8360857ba4bfce 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -1,5 +1,7 @@ 'use strict'; +const { Buffer } = require('buffer'); +const { StorageObject, hexTable } = require('internal/querystring'); const QueryString = module.exports = { unescapeBuffer, // `unescape()` is a JS global, so we need to use a different local name @@ -14,13 +16,6 @@ const QueryString = module.exports = { parse, decode: parse }; -const Buffer = require('buffer').Buffer; - -// This constructor is used to store parsed query string values. Instantiating -// this is faster than explicitly calling `Object.create(null)` to get a -// "clean" empty object (tested with v8 v4.9). -function ParsedQueryString() {} -ParsedQueryString.prototype = Object.create(null); const unhexTable = [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0 - 15 @@ -116,10 +111,6 @@ function qsUnescape(s, decodeSpaces) { } -const hexTable = []; -for (var i = 0; i < 256; ++i) - hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); - // These characters do not need escaping when generating query strings: // ! - . _ ~ // ' ( ) * @@ -263,7 +254,7 @@ const defEqCodes = [61]; // = // Parse a key/val string. function parse(qs, sep, eq, options) { - const obj = new ParsedQueryString(); + const obj = new StorageObject(); if (typeof qs !== 'string' || qs.length === 0) { return obj; diff --git a/lib/url.js b/lib/url.js index 9164d5991b7a67..845f22a44447d6 100644 --- a/lib/url.js +++ b/lib/url.js @@ -10,8 +10,8 @@ function importPunycode() { const { toASCII } = importPunycode(); +const { StorageObject, hexTable } = require('internal/querystring'); const internalUrl = require('internal/url'); -const encodeAuth = internalUrl.encodeAuth; exports.parse = urlParse; exports.resolve = urlResolve; exports.resolveObject = urlResolveObject; @@ -76,12 +76,6 @@ const slashedProtocol = { }; const querystring = require('querystring'); -// This constructor is used to store parsed query string values. Instantiating -// this is faster than explicitly calling `Object.create(null)` to get a -// "clean" empty object (tested with v8 v4.9). -function ParsedQueryString() {} -ParsedQueryString.prototype = Object.create(null); - function urlParse(url, parseQueryString, slashesDenoteHost) { if (url instanceof Url) return url; @@ -190,7 +184,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { } } else if (parseQueryString) { this.search = ''; - this.query = new ParsedQueryString(); + this.query = new StorageObject(); } return this; } @@ -380,7 +374,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { } else if (parseQueryString) { // no query string, but parseQueryString still requested this.search = ''; - this.query = new ParsedQueryString(); + this.query = new StorageObject(); } var firstIdx = (questionIdx !== -1 && @@ -959,3 +953,75 @@ function spliceOne(list, index) { list[i] = list[k]; list.pop(); } + +// These characters do not need escaping: +// ! - . _ ~ +// ' ( ) * : +// digits +// alpha (uppercase) +// alpha (lowercase) +const noEscapeAuth = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F +]; + +function encodeAuth(str) { + // faster encodeURIComponent alternative for encoding auth uri components + var out = ''; + var lastPos = 0; + for (var i = 0; i < str.length; ++i) { + var c = str.charCodeAt(i); + + // ASCII + if (c < 0x80) { + if (noEscapeAuth[c] === 1) + continue; + if (lastPos < i) + out += str.slice(lastPos, i); + lastPos = i + 1; + out += hexTable[c]; + continue; + } + + if (lastPos < i) + out += str.slice(lastPos, i); + + // Multi-byte characters ... + if (c < 0x800) { + lastPos = i + 1; + out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; + continue; + } + if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; + out += hexTable[0xE0 | (c >> 12)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; + continue; + } + // Surrogate pair + ++i; + var c2; + if (i < str.length) + c2 = str.charCodeAt(i) & 0x3FF; + else + c2 = 0; + lastPos = i + 1; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); + out += hexTable[0xF0 | (c >> 18)] + + hexTable[0x80 | ((c >> 12) & 0x3F)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; + } + if (lastPos === 0) + return str; + if (lastPos < str.length) + return out + str.slice(lastPos); + return out; +} diff --git a/node.gyp b/node.gyp index 69b26d340d6666..d14e7ff8f13d62 100644 --- a/node.gyp +++ b/node.gyp @@ -92,6 +92,7 @@ 'lib/internal/process/stdio.js', 'lib/internal/process/warning.js', 'lib/internal/process.js', + 'lib/internal/querystring.js', 'lib/internal/readline.js', 'lib/internal/repl.js', 'lib/internal/socket_list.js', diff --git a/test/parallel/test-url.js b/test/parallel/test-url.js index fa43e1fd5f1330..6e42b5e78041dd 100644 --- a/test/parallel/test-url.js +++ b/test/parallel/test-url.js @@ -1231,6 +1231,15 @@ const formatTests = { protocol: 'file', pathname: '/home/user', path: '/home/user' + }, + + // surrogate in auth + 'http://%F0%9F%98%80@www.example.com/': { + href: 'http://%F0%9F%98%80@www.example.com/', + protocol: 'http:', + auth: '\uD83D\uDE00', + hostname: 'www.example.com', + pathname: '/' } }; for (const u in formatTests) {