From 61204720361824881aefd64f5bccda7d7be6617a Mon Sep 17 00:00:00 2001 From: Jonathan Johnson Date: Wed, 26 Nov 2014 20:02:25 -0600 Subject: [PATCH] url: change hostname regex to negate invalid chars Regarding joyent/node#8520 This changes hostname validation from a whitelist regex approach to a blacklist regex approach as described in https://url.spec.whatwg.org/#host-parsing. url.parse misinterpreted `https://good.com+.evil.org/` as `https://good.com/+.evil.org/`. If we use url.parse to check the validity of the hostname, the test passes, but in the browser the user is redirected to the evil.org website. --- lib/url.js | 5 +++-- test/simple/test-url.js | 36 ++++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lib/url.js b/lib/url.js index f5e7ec0a9f7b8e..0302fda1427181 100644 --- a/lib/url.js +++ b/lib/url.js @@ -70,8 +70,9 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i, nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape), hostEndingChars = ['/', '?', '#'], hostnameMaxLen = 255, - hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/, - hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/, + hostnamePatternString = '[^' + nonHostChars.join('') + ']{0,63}', + hostnamePartPattern = new RegExp('^' + hostnamePatternString + '$'), + hostnamePartStart = new RegExp('^(' + hostnamePatternString + ')(.*)$'), // protocols that can allow "unsafe" and "unwise" chars. unsafeProtocol = { 'javascript': true, diff --git a/test/simple/test-url.js b/test/simple/test-url.js index df72cc6f4e65e1..f12a00dbed0b72 100644 --- a/test/simple/test-url.js +++ b/test/simple/test-url.js @@ -177,32 +177,44 @@ var parseTests = { 'path': '/Y' }, + // + not an invalid host character + // per https://url.spec.whatwg.org/#host-parsing + 'http://x.y.com+a/b/c' : { + 'href': 'http://x.y.com+a/b/c', + 'protocol': 'http:', + 'slashes': true, + 'host': 'x.y.com+a', + 'hostname': 'x.y.com+a', + 'pathname': '/b/c', + 'path': '/b/c' + }, + // an unexpected invalid char in the hostname. - 'HtTp://x.y.cOm*a/b/c?d=e#f gi' : { - 'href': 'http://x.y.com/*a/b/c?d=e#f%20g%3Ch%3Ei', + 'HtTp://x.y.cOm;a/b/c?d=e#f gi' : { + 'href': 'http://x.y.com/;a/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'x.y.com', 'hostname': 'x.y.com', - 'pathname': '/*a/b/c', + 'pathname': ';a/b/c', 'search': '?d=e', 'query': 'd=e', 'hash': '#f%20g%3Ch%3Ei', - 'path': '/*a/b/c?d=e' + 'path': ';a/b/c?d=e' }, // make sure that we don't accidentally lcast the path parts. - 'HtTp://x.y.cOm*A/b/c?d=e#f gi' : { - 'href': 'http://x.y.com/*A/b/c?d=e#f%20g%3Ch%3Ei', + 'HtTp://x.y.cOm;A/b/c?d=e#f gi' : { + 'href': 'http://x.y.com/;A/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'x.y.com', 'hostname': 'x.y.com', - 'pathname': '/*A/b/c', + 'pathname': ';A/b/c', 'search': '?d=e', 'query': 'd=e', 'hash': '#f%20g%3Ch%3Ei', - 'path': '/*A/b/c?d=e' + 'path': ';A/b/c?d=e' }, 'http://x...y...#p': { @@ -517,17 +529,17 @@ var parseTests = { 'path': '/' }, - 'http://www.Äffchen.cOm*A/b/c?d=e#f gi' : { - 'href': 'http://www.xn--ffchen-9ta.com/*A/b/c?d=e#f%20g%3Ch%3Ei', + 'http://www.Äffchen.cOm;A/b/c?d=e#f gi' : { + 'href': 'http://www.xn--ffchen-9ta.com/;A/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'www.xn--ffchen-9ta.com', 'hostname': 'www.xn--ffchen-9ta.com', - 'pathname': '/*A/b/c', + 'pathname': ';A/b/c', 'search': '?d=e', 'query': 'd=e', 'hash': '#f%20g%3Ch%3Ei', - 'path': '/*A/b/c?d=e' + 'path': ';A/b/c?d=e' }, 'http://SÉLIER.COM/' : {