diff --git a/lib/url.js b/lib/url.js index 8280ddf056c967e..328452ba3cb27a7 100644 --- a/lib/url.js +++ b/lib/url.js @@ -177,7 +177,9 @@ function isIpv6Hostname(hostname) { // as IPv6 by isIpv6Hostname above // // [1]: https://url.spec.whatwg.org/#forbidden-host-code-point -const forbiddenHostChars = /[\t\n\r #%/:<>?@[\\\]^|]/; +const forbiddenHostChars = /[\0\t\n\r #%/:<>?@[\\\]^|]/; +// For IPv6, permit '[', ']', and ':'. +const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/; Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { validateString(url, 'url'); @@ -400,27 +402,33 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { this.hostname = this.hostname.toLowerCase(); } - if (!ipv6Hostname && this.hostname !== '') { - // IDNA Support: Returns a punycoded representation of "domain". - // It only converts parts of the domain name that - // have non-ASCII characters, i.e. it doesn't matter if - // you call it with a domain that already is ASCII-only. - - // Use lenient mode (`true`) to try to support even non-compliant - // URLs. - this.hostname = toASCII(this.hostname, true); - - // Prevent two potential routes of hostname spoofing. - // 1. If this.hostname is empty, it must have become empty due to toASCII - // since we checked this.hostname above. - // 2. If any of forbiddenHostChars appears in this.hostname, it must have - // also gotten in due to toASCII. This is since getHostname would have - // filtered them out otherwise. - // Rather than trying to correct this by moving the non-host part into - // the pathname as we've done in getHostname, throw an exception to - // convey the severity of this issue. - if (this.hostname === '' || forbiddenHostChars.test(this.hostname)) { - throw new ERR_INVALID_URL(url); + if (this.hostname !== '') { + if (ipv6Hostname) { + if (forbiddenHostCharsIpv6.test(this.hostname)) { + throw new ERR_INVALID_URL(url); + } + } else { + // IDNA Support: Returns a punycoded representation of "domain". + // It only converts parts of the domain name that + // have non-ASCII characters, i.e. it doesn't matter if + // you call it with a domain that already is ASCII-only. + + // Use lenient mode (`true`) to try to support even non-compliant + // URLs. + this.hostname = toASCII(this.hostname, true); + + // Prevent two potential routes of hostname spoofing. + // 1. If this.hostname is empty, it must have become empty due to toASCII + // since we checked this.hostname above. + // 2. If any of forbiddenHostChars appears in this.hostname, it must have + // also gotten in due to toASCII. This is since getHostname would have + // filtered them out otherwise. + // Rather than trying to correct this by moving the non-host part into + // the pathname as we've done in getHostname, throw an exception to + // convey the severity of this issue. + if (this.hostname === '' || forbiddenHostChars.test(this.hostname)) { + throw new ERR_INVALID_URL(url); + } } } diff --git a/test/parallel/test-url-parse-invalid-input.js b/test/parallel/test-url-parse-invalid-input.js index 45d6ff943073812..75ef800d23927b1 100644 --- a/test/parallel/test-url-parse-invalid-input.js +++ b/test/parallel/test-url-parse-invalid-input.js @@ -37,6 +37,10 @@ assert.throws(() => { url.parse('http://%E0%A4%A@fail'); }, return e.code === undefined; }); +assert.throws(() => { url.parse('http://[127.0.0.1\x00c8763]:8000/'); }, + { code: 'ERR_INVALID_URL', input: 'http://[127.0.0.1\x00c8763]:8000/' } +) + if (common.hasIntl) { // An array of Unicode code points whose Unicode NFKD contains a "bad // character".