Skip to content

Commit

Permalink
url: change hostname regex to negate invalid chars
Browse files Browse the repository at this point in the history
Regarding nodejs/node-v0.x-archive#8520

This changes hostname validation from a whitelist regex approach
to a blacklist regex approach as described in https://url.spec.whatwg.org/#host-parsing.

url.parse misinterpreted `https://good.com+.evil.org/`
as `https://good.com/+.evil.org/`.  If we use url.parse to check the
validity of the hostname, the test passes, but in the browser the
user is redirected to the evil.org website.
  • Loading branch information
jondavidjohn authored and trevnorris committed Dec 3, 2014
1 parent c4f6c22 commit 6120472
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 14 deletions.
5 changes: 3 additions & 2 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape),
hostEndingChars = ['/', '?', '#'],
hostnameMaxLen = 255,
hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/,
hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/,
hostnamePatternString = '[^' + nonHostChars.join('') + ']{0,63}',
hostnamePartPattern = new RegExp('^' + hostnamePatternString + '$'),
hostnamePartStart = new RegExp('^(' + hostnamePatternString + ')(.*)$'),
// protocols that can allow "unsafe" and "unwise" chars.
unsafeProtocol = {
'javascript': true,
Expand Down
36 changes: 24 additions & 12 deletions test/simple/test-url.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,32 +177,44 @@ var parseTests = {
'path': '/Y'
},

// + not an invalid host character
// per https://url.spec.whatwg.org/#host-parsing
'http://x.y.com+a/b/c' : {
'href': 'http://x.y.com+a/b/c',
'protocol': 'http:',
'slashes': true,
'host': 'x.y.com+a',
'hostname': 'x.y.com+a',
'pathname': '/b/c',
'path': '/b/c'
},

// an unexpected invalid char in the hostname.
'HtTp://x.y.cOm*a/b/c?d=e#f g<h>i' : {
'href': 'http://x.y.com/*a/b/c?d=e#f%20g%3Ch%3Ei',
'HtTp://x.y.cOm;a/b/c?d=e#f g<h>i' : {
'href': 'http://x.y.com/;a/b/c?d=e#f%20g%3Ch%3Ei',
'protocol': 'http:',
'slashes': true,
'host': 'x.y.com',
'hostname': 'x.y.com',
'pathname': '/*a/b/c',
'pathname': ';a/b/c',
'search': '?d=e',
'query': 'd=e',
'hash': '#f%20g%3Ch%3Ei',
'path': '/*a/b/c?d=e'
'path': ';a/b/c?d=e'
},

// make sure that we don't accidentally lcast the path parts.
'HtTp://x.y.cOm*A/b/c?d=e#f g<h>i' : {
'href': 'http://x.y.com/*A/b/c?d=e#f%20g%3Ch%3Ei',
'HtTp://x.y.cOm;A/b/c?d=e#f g<h>i' : {
'href': 'http://x.y.com/;A/b/c?d=e#f%20g%3Ch%3Ei',
'protocol': 'http:',
'slashes': true,
'host': 'x.y.com',
'hostname': 'x.y.com',
'pathname': '/*A/b/c',
'pathname': ';A/b/c',
'search': '?d=e',
'query': 'd=e',
'hash': '#f%20g%3Ch%3Ei',
'path': '/*A/b/c?d=e'
'path': ';A/b/c?d=e'
},

'http://x...y...#p': {
Expand Down Expand Up @@ -517,17 +529,17 @@ var parseTests = {
'path': '/'
},

'http://www.Äffchen.cOm*A/b/c?d=e#f g<h>i' : {
'href': 'http://www.xn--ffchen-9ta.com/*A/b/c?d=e#f%20g%3Ch%3Ei',
'http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i' : {
'href': 'http://www.xn--ffchen-9ta.com/;A/b/c?d=e#f%20g%3Ch%3Ei',
'protocol': 'http:',
'slashes': true,
'host': 'www.xn--ffchen-9ta.com',
'hostname': 'www.xn--ffchen-9ta.com',
'pathname': '/*A/b/c',
'pathname': ';A/b/c',
'search': '?d=e',
'query': 'd=e',
'hash': '#f%20g%3Ch%3Ei',
'path': '/*A/b/c?d=e'
'path': ';A/b/c?d=e'
},

'http://SÉLIER.COM/' : {
Expand Down

0 comments on commit 6120472

Please sign in to comment.