Skip to content

Commit

Permalink
Improves pattern to detect http / https / ftp urls
Browse files Browse the repository at this point in the history
  • Loading branch information
botic committed Mar 18, 2014
1 parent c857936 commit 3ece85c
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 8 deletions.
50 changes: 42 additions & 8 deletions modules/ringo/utils/strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,45 @@ var NUMPATTERN = /[^0-9]/;
var FILEPATTERN = /[^a-zA-Z0-9-_\. ]/;
var HEXPATTERN = /[^a-fA-F0-9]/;

// Email and URL RegExps contributed by Scott Gonzalez: http://projects.scottsplayground.com/email_address_validation/
// Email RegExp contributed by Scott Gonzalez (http://projects.scottsplayground.com/email_address_validation/)
// licensed unter MIT license - http://www.opensource.org/licenses/mit-license.php
var EMAILPATTERN = /^((([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+(\.([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+)*)|((\x22)((((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(([\x01-\x08\x0b\x0c\x0e-\x1f\x7f]|\x21|[\x23-\x5b]|[\x5d-\x7e]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(\\([\x01-\x09\x0b\x0c\x0d-\x7f]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]))))*(((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(\x22)))@((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?$/i;

var URLPATTERN = /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i;
// URL RegExp contributed by Diego Perini
// licensed unter MIT license - https://gist.github.com/dperini/729294
// Copyright (c) 2010-2013 Diego Perini (http://www.iport.it)
var URLPATTERN = java.util.regex.Pattern.compile("^" +
// protocol identifier
"(?:(?:https?|ftp)://)" +
// user:pass authentication
"(?:\\S+(?::\\S*)?@)?" +
"(?:" +
// IP address exclusion
// private & local networks
"(?!(?:10|127)(?:\\.\\d{1,3}){3})" +
"(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" +
"(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
"|" +
// host name
"(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" +
// domain name
"(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" +
// TLD identifier
"(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" +
")" +
// port number
"(?::\\d{2,5})?" +
// resource path
"(?:/[^\\s]*)?" +
"$", java.util.regex.Pattern.CASE_INSENSITIVE);

// Copyright (c) 2014 Chris O'Hara cohara87@gmail.com
// licensed unter MIT license - https://github.com/chriso/validator.js/blob/master/LICENSE
Expand Down Expand Up @@ -98,8 +132,7 @@ function isDateFormat(string) {
}

/**
* parse a timestamp into a date object. This is used when users
* want to set createtime explicitly when creating/editing stories.
* parse a timestamp into a date object.
* @param {String} string the string
* @param {String} format date format to be applied
* @param {Object} timezone Java TimeZone Object (optional)
Expand All @@ -114,14 +147,15 @@ function toDate(string, format, timezone) {
}

/**
* function checks if the string passed contains any characters that
* are forbidden in URLs and tries to create a java.net.URL from it
* FIXME: probably deprecated -> ringo.Url
* function checks if the string is an URL validating.
* Only HTTP, HTTPS and FTP are allowed protocols.
* @param {String} string the string
* @returns Boolean
*/
function isUrl(string) {
return URLPATTERN.test(string);
// uses java.util.regex.Pattern for performance reasons,
// pure JS / Rhino RegExp will not stop in feasible time!
return (URLPATTERN.matcher(string)).matches();
}

/**
Expand Down
78 changes: 78 additions & 0 deletions test/ringo/utils/strings_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,84 @@ exports.testToDate = function () {
exports.testIsUrl = function () {
assert.isTrue(strings.isUrl(URL));
assert.isFalse(strings.isUrl(FOO));

// URLs from http://mathiasbynens.be/demo/url-regex
assert.isTrue(strings.isUrl("http://✪df.ws"));
assert.isTrue(strings.isUrl("http://foo.com/blah_blah"));
assert.isTrue(strings.isUrl("http://foo.com/blah_blah/"));
assert.isTrue(strings.isUrl("http://foo.com/blah_blah_(wikipedia)"));
assert.isTrue(strings.isUrl("http://foo.com/blah_blah_(wikipedia)_(again)"));
assert.isTrue(strings.isUrl("http://www.example.com/wpstyle/?p=364"));
assert.isTrue(strings.isUrl("https://www.example.com/foo/?bar=baz&inga=42&quux"));
assert.isTrue(strings.isUrl("http://✪df.ws/123"));
assert.isTrue(strings.isUrl("http://userid:password@example.com:8080"));
assert.isTrue(strings.isUrl("http://userid:password@example.com:8080/"));
assert.isTrue(strings.isUrl("http://userid@example.com"));
assert.isTrue(strings.isUrl("http://userid@example.com/"));
assert.isTrue(strings.isUrl("http://userid@example.com:8080"));
assert.isTrue(strings.isUrl("http://userid@example.com:8080/"));
assert.isTrue(strings.isUrl("http://userid:password@example.com"));
assert.isTrue(strings.isUrl("http://userid:password@example.com/"));
assert.isTrue(strings.isUrl("http://142.42.1.1/"));
assert.isTrue(strings.isUrl("http://142.42.1.1:8080/"));
assert.isTrue(strings.isUrl("http://➡.ws/䨹"));
assert.isTrue(strings.isUrl("http://⌘.ws"));
assert.isTrue(strings.isUrl("http://⌘.ws/"));
assert.isTrue(strings.isUrl("http://foo.com/blah_(wikipedia)#cite-1"));
assert.isTrue(strings.isUrl("http://foo.com/blah_(wikipedia)_blah#cite-1"));
assert.isTrue(strings.isUrl("http://foo.com/unicode_(✪)_in_parens"));
assert.isTrue(strings.isUrl("http://foo.com/(something)?after=parens"));
assert.isTrue(strings.isUrl("http://☺.damowmow.com/"));
assert.isTrue(strings.isUrl("http://code.google.com/events/#&product=browser"));
assert.isTrue(strings.isUrl("http://j.mp"));
assert.isTrue(strings.isUrl("ftp://foo.bar/baz"));
assert.isTrue(strings.isUrl("http://foo.bar/?q=Test%20URL-encoded%20stuff"));
assert.isTrue(strings.isUrl("http://مثال.إختبار"));
assert.isTrue(strings.isUrl("http://例子.测试"));
assert.isTrue(strings.isUrl("http://उदाहरण.परीक्षा"));
assert.isTrue(strings.isUrl("http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"));
assert.isTrue(strings.isUrl("http://1337.net"));
assert.isTrue(strings.isUrl("http://a.b-c.de"));
assert.isTrue(strings.isUrl("http://223.255.255.254"));
assert.isFalse(strings.isUrl("http://"));
assert.isFalse(strings.isUrl("http://."));
assert.isFalse(strings.isUrl("http://.."));
assert.isFalse(strings.isUrl("http://../"));
assert.isFalse(strings.isUrl("http://?"));
assert.isFalse(strings.isUrl("http://??"));
assert.isFalse(strings.isUrl("http://??/"));
assert.isFalse(strings.isUrl("http://#"));
assert.isFalse(strings.isUrl("http://##"));
assert.isFalse(strings.isUrl("http://##/"));
assert.isFalse(strings.isUrl("http://foo.bar?q=Spaces should be encoded"));
assert.isFalse(strings.isUrl("//"));
assert.isFalse(strings.isUrl("//a"));
assert.isFalse(strings.isUrl("///a"));
assert.isFalse(strings.isUrl("///"));
assert.isFalse(strings.isUrl("http:///a"));
assert.isFalse(strings.isUrl("foo.com"));
assert.isFalse(strings.isUrl("rdar://1234"));
assert.isFalse(strings.isUrl("h://test"));
assert.isFalse(strings.isUrl("http:// shouldfail.com"));
assert.isFalse(strings.isUrl(":// should fail"));
assert.isFalse(strings.isUrl("http://foo.bar/foo(bar)baz quux"));
assert.isFalse(strings.isUrl("ftps://foo.bar/"));
assert.isFalse(strings.isUrl("http://-error-.invalid/"));
assert.isFalse(strings.isUrl("http://a.b--c.de/"));
assert.isFalse(strings.isUrl("http://-a.b.co"));
assert.isFalse(strings.isUrl("http://a.b-.co"));
assert.isFalse(strings.isUrl("http://0.0.0.0"));
assert.isFalse(strings.isUrl("http://10.1.1.0"));
assert.isFalse(strings.isUrl("http://10.1.1.255"));
assert.isFalse(strings.isUrl("http://224.1.1.1"));
assert.isFalse(strings.isUrl("http://1.1.1.1.1"));
assert.isFalse(strings.isUrl("http://123.123.123"));
assert.isFalse(strings.isUrl("http://3628126748"));
assert.isFalse(strings.isUrl("http://.www.foo.bar/"));
assert.isFalse(strings.isUrl("http://www.foo.bar./"));
assert.isFalse(strings.isUrl("http://.www.foo.bar./"));
assert.isFalse(strings.isUrl("http://10.1.1.1"));
assert.isFalse(strings.isUrl("http://10.1.1.254"));
};

exports.testIsFileName = function () {
Expand Down

0 comments on commit 3ece85c

Please sign in to comment.