From 61204720361824881aefd64f5bccda7d7be6617a Mon Sep 17 00:00:00 2001
From: Jonathan Johnson <me@jondavidjohn.com>
Date: Wed, 26 Nov 2014 20:02:25 -0600
Subject: [PATCH] url: change hostname regex to negate invalid chars

Regarding joyent/node#8520

This changes hostname validation from a whitelist regex approach
to a blacklist regex approach as described in https://url.spec.whatwg.org/#host-parsing.

url.parse misinterpreted `https://good.com+.evil.org/`
as `https://good.com/+.evil.org/`.  If we use url.parse to check the
validity of the hostname, the test passes, but in the browser the
user is redirected to the evil.org website.
---
 lib/url.js              |  5 +++--
 test/simple/test-url.js | 36 ++++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/lib/url.js b/lib/url.js
index f5e7ec0a9f7b8e..0302fda1427181 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -70,8 +70,9 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
     nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape),
     hostEndingChars = ['/', '?', '#'],
     hostnameMaxLen = 255,
-    hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/,
-    hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/,
+    hostnamePatternString = '[^' + nonHostChars.join('') + ']{0,63}',
+    hostnamePartPattern = new RegExp('^' + hostnamePatternString + '$'),
+    hostnamePartStart = new RegExp('^(' + hostnamePatternString + ')(.*)$'),
     // protocols that can allow "unsafe" and "unwise" chars.
     unsafeProtocol = {
       'javascript': true,
diff --git a/test/simple/test-url.js b/test/simple/test-url.js
index df72cc6f4e65e1..f12a00dbed0b72 100644
--- a/test/simple/test-url.js
+++ b/test/simple/test-url.js
@@ -177,32 +177,44 @@ var parseTests = {
     'path': '/Y'
   },
 
+  // + not an invalid host character
+  // per https://url.spec.whatwg.org/#host-parsing
+  'http://x.y.com+a/b/c' : {
+    'href': 'http://x.y.com+a/b/c',
+    'protocol': 'http:',
+    'slashes': true,
+    'host': 'x.y.com+a',
+    'hostname': 'x.y.com+a',
+    'pathname': '/b/c',
+    'path': '/b/c'
+  },
+
   // an unexpected invalid char in the hostname.
-  'HtTp://x.y.cOm*a/b/c?d=e#f g<h>i' : {
-    'href': 'http://x.y.com/*a/b/c?d=e#f%20g%3Ch%3Ei',
+  'HtTp://x.y.cOm;a/b/c?d=e#f g<h>i' : {
+    'href': 'http://x.y.com/;a/b/c?d=e#f%20g%3Ch%3Ei',
     'protocol': 'http:',
     'slashes': true,
     'host': 'x.y.com',
     'hostname': 'x.y.com',
-    'pathname': '/*a/b/c',
+    'pathname': ';a/b/c',
     'search': '?d=e',
     'query': 'd=e',
     'hash': '#f%20g%3Ch%3Ei',
-    'path': '/*a/b/c?d=e'
+    'path': ';a/b/c?d=e'
   },
 
   // make sure that we don't accidentally lcast the path parts.
-  'HtTp://x.y.cOm*A/b/c?d=e#f g<h>i' : {
-    'href': 'http://x.y.com/*A/b/c?d=e#f%20g%3Ch%3Ei',
+  'HtTp://x.y.cOm;A/b/c?d=e#f g<h>i' : {
+    'href': 'http://x.y.com/;A/b/c?d=e#f%20g%3Ch%3Ei',
     'protocol': 'http:',
     'slashes': true,
     'host': 'x.y.com',
     'hostname': 'x.y.com',
-    'pathname': '/*A/b/c',
+    'pathname': ';A/b/c',
     'search': '?d=e',
     'query': 'd=e',
     'hash': '#f%20g%3Ch%3Ei',
-    'path': '/*A/b/c?d=e'
+    'path': ';A/b/c?d=e'
   },
 
   'http://x...y...#p': {
@@ -517,17 +529,17 @@ var parseTests = {
     'path': '/'
   },
 
-  'http://www.Äffchen.cOm*A/b/c?d=e#f g<h>i' : {
-    'href': 'http://www.xn--ffchen-9ta.com/*A/b/c?d=e#f%20g%3Ch%3Ei',
+  'http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i' : {
+    'href': 'http://www.xn--ffchen-9ta.com/;A/b/c?d=e#f%20g%3Ch%3Ei',
     'protocol': 'http:',
     'slashes': true,
     'host': 'www.xn--ffchen-9ta.com',
     'hostname': 'www.xn--ffchen-9ta.com',
-    'pathname': '/*A/b/c',
+    'pathname': ';A/b/c',
     'search': '?d=e',
     'query': 'd=e',
     'hash': '#f%20g%3Ch%3Ei',
-    'path': '/*A/b/c?d=e'
+    'path': ';A/b/c?d=e'
   },
 
   'http://SÉLIER.COM/' : {