From 59559b8e7b76f1646af7e6b7d3a1afb3f1db3260 Mon Sep 17 00:00:00 2001 From: Rich Trott Date: Mon, 17 Oct 2022 11:46:25 -0700 Subject: [PATCH] url: treat special characters in hostnames more strictly in url.parse() Throw if ^, |, and some other special characters are in the hostname, similar to WHATWG URL. Use punycode/toAscii when % appears in a hostname, like WHATWG URL. --- lib/internal/idna.js | 9 +--- lib/url.js | 42 +++++++++++-------- test/parallel/test-url-format.js | 6 --- test/parallel/test-url-parse-format.js | 2 +- test/parallel/test-url-parse-invalid-input.js | 15 +++++++ 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/lib/internal/idna.js b/lib/internal/idna.js index 8591226d104d3a..566f8590d8755c 100644 --- a/lib/internal/idna.js +++ b/lib/internal/idna.js @@ -1,9 +1,4 @@ 'use strict'; -if (internalBinding('config').hasIntl) { - const { toASCII, toUnicode } = internalBinding('icu'); - module.exports = { toASCII, toUnicode }; -} else { - const { domainToASCII, domainToUnicode } = require('internal/url'); - module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode }; -} +const { domainToASCII, domainToUnicode } = require('internal/url'); +module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode }; diff --git a/lib/url.js b/lib/url.js index 95b7f8afe29e77..61d18bdce690ce 100644 --- a/lib/url.js +++ b/lib/url.js @@ -130,7 +130,6 @@ const { CHAR_QUESTION_MARK, CHAR_DOUBLE_QUOTE, CHAR_SINGLE_QUOTE, - CHAR_PERCENT, CHAR_SEMICOLON, CHAR_BACKWARD_SLASH, CHAR_CIRCUMFLEX_ACCENT, @@ -314,6 +313,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { let hostEnd = -1; let atSign = -1; let nonHost = -1; + let invalid = -1; for (let i = 0; i < rest.length; ++i) { switch (rest.charCodeAt(i)) { case CHAR_TAB: @@ -324,28 +324,29 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { i -= 1; break; case CHAR_SPACE: - case CHAR_DOUBLE_QUOTE: - case CHAR_PERCENT: - case CHAR_SINGLE_QUOTE: - case CHAR_SEMICOLON: case CHAR_LEFT_ANGLE_BRACKET: case CHAR_RIGHT_ANGLE_BRACKET: - case CHAR_BACKWARD_SLASH: case CHAR_CIRCUMFLEX_ACCENT: + case CHAR_VERTICAL_LINE: + case CHAR_DOUBLE_QUOTE: + case CHAR_SINGLE_QUOTE: case CHAR_GRAVE_ACCENT: case CHAR_LEFT_CURLY_BRACKET: - case CHAR_VERTICAL_LINE: case CHAR_RIGHT_CURLY_BRACKET: - // Characters that are never ever allowed in a hostname from RFC 2396 - if (nonHost === -1) - nonHost = i; + // Characters that are never ever allowed in a hostname from RFC 2396 + if (invalid === -1) { + invalid = i; + } break; + case CHAR_SEMICOLON: + case CHAR_BACKWARD_SLASH: case CHAR_HASH: case CHAR_FORWARD_SLASH: case CHAR_QUESTION_MARK: // Find the first instance of any host-ending characters - if (nonHost === -1) + if (nonHost === -1) { nonHost = i; + } hostEnd = i; break; case CHAR_AT: @@ -353,6 +354,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { // auth portion cannot go past, or the last @ char is the decider. atSign = i; nonHost = -1; + invalid = -1; break; } if (hostEnd !== -1) @@ -364,9 +366,15 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { start = atSign + 1; } if (nonHost === -1) { + if (invalid !== -1) { + throw new ERR_INVALID_URL(url); + } this.host = rest.slice(start); rest = ''; } else { + if (invalid > -1 && invalid < nonHost) { + throw new ERR_INVALID_URL(url); + } this.host = rest.slice(start, nonHost); rest = rest.slice(nonHost); } @@ -509,13 +517,13 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { function getHostname(self, rest, hostname, url) { for (let i = 0; i < hostname.length; ++i) { const code = hostname.charCodeAt(i); - const isValid = (code !== CHAR_FORWARD_SLASH && - code !== CHAR_BACKWARD_SLASH && - code !== CHAR_HASH && - code !== CHAR_QUESTION_MARK && - code !== CHAR_COLON); + const isHostEnd = (code === CHAR_FORWARD_SLASH || + code === CHAR_BACKWARD_SLASH || + code === CHAR_HASH || + code === CHAR_QUESTION_MARK || + code === CHAR_COLON); - if (!isValid) { + if (isHostEnd) { // If leftover starts with :, then it represents an invalid port. if (hostname.charCodeAt(i) === 58) { throw new ERR_INVALID_URL(url); diff --git a/test/parallel/test-url-format.js b/test/parallel/test-url-format.js index 883d060ac2a152..7294838524332a 100644 --- a/test/parallel/test-url-format.js +++ b/test/parallel/test-url-format.js @@ -67,12 +67,6 @@ const formatTests = { hash: '#frag=?bar/#frag', pathname: '/' }, - 'http://google.com" onload="alert(42)/': { - href: 'http://google.com/%22%20onload=%22alert(42)/', - protocol: 'http:', - host: 'google.com', - pathname: '/%22%20onload=%22alert(42)/' - }, 'http://a.com/a/b/c?s#h': { href: 'http://a.com/a/b/c?s#h', protocol: 'http', diff --git a/test/parallel/test-url-parse-format.js b/test/parallel/test-url-parse-format.js index 03e7b7ece5ef5c..8a5372f2f8b36f 100644 --- a/test/parallel/test-url-parse-format.js +++ b/test/parallel/test-url-parse-format.js @@ -1007,7 +1007,7 @@ for (const u in parseTests) { assert.deepStrictEqual( actual, expected, - `parsing ${u} and expected ${inspect(expected)} but got ${inspect(actual)}` + `parsing ${inspect(u)}, expected ${inspect(expected)}, got ${inspect(actual)}` ); assert.deepStrictEqual( spaced, diff --git a/test/parallel/test-url-parse-invalid-input.js b/test/parallel/test-url-parse-invalid-input.js index 7ab86380ad7384..36a03cb4fdf261 100644 --- a/test/parallel/test-url-parse-invalid-input.js +++ b/test/parallel/test-url-parse-invalid-input.js @@ -86,3 +86,18 @@ if (common.hasIntl) { `parsing ${badURL}`); }); } + +{ + const badChars = [ ' ', '>', '<', '^', '|' ]; + for (const badChar of badChars) { + const badURL = `http://fail${badChar}fail.com/`; + assert.throws(() => { url.parse(badURL); }, + (e) => e.code === 'ERR_INVALID_URL', + `parsing "${badURL}"`); + } +} + +assert.throws(() => { url.parse('http://google.com" onload="alert(42)/'); }, + (e) => e.code === 'ERR_INVALID_URL', + 'parsing \'http://google.com" onload="alert(42)/\'' +);