From 3ece85c7741be49bab65a8a649a0561d0c328425 Mon Sep 17 00:00:00 2001 From: Philipp Naderer Date: Tue, 18 Mar 2014 13:59:56 +0100 Subject: [PATCH] Improves pattern to detect http / https / ftp urls --- modules/ringo/utils/strings.js | 50 ++++++++++++++++---- test/ringo/utils/strings_test.js | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 8 deletions(-) diff --git a/modules/ringo/utils/strings.js b/modules/ringo/utils/strings.js index 622b605f1..abc611c3b 100644 --- a/modules/ringo/utils/strings.js +++ b/modules/ringo/utils/strings.js @@ -20,11 +20,45 @@ var NUMPATTERN = /[^0-9]/; var FILEPATTERN = /[^a-zA-Z0-9-_\. ]/; var HEXPATTERN = /[^a-fA-F0-9]/; -// Email and URL RegExps contributed by Scott Gonzalez: http://projects.scottsplayground.com/email_address_validation/ +// Email RegExp contributed by Scott Gonzalez (http://projects.scottsplayground.com/email_address_validation/) // licensed unter MIT license - http://www.opensource.org/licenses/mit-license.php var EMAILPATTERN = /^((([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+(\.([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+)*)|((\x22)((((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(([\x01-\x08\x0b\x0c\x0e-\x1f\x7f]|\x21|[\x23-\x5b]|[\x5d-\x7e]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(\\([\x01-\x09\x0b\x0c\x0d-\x7f]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]))))*(((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(\x22)))@((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?$/i; -var URLPATTERN = /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i; +// URL RegExp contributed by Diego Perini +// licensed unter MIT license - https://gist.github.com/dperini/729294 +// Copyright (c) 2010-2013 Diego Perini (http://www.iport.it) +var URLPATTERN = java.util.regex.Pattern.compile("^" + + // protocol identifier + "(?:(?:https?|ftp)://)" + + // user:pass authentication + "(?:\\S+(?::\\S*)?@)?" + + "(?:" + + // IP address exclusion + // private & local networks + "(?!(?:10|127)(?:\\.\\d{1,3}){3})" + + "(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" + + "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" + + // IP address dotted notation octets + // excludes loopback network 0.0.0.0 + // excludes reserved space >= 224.0.0.0 + // excludes network & broacast addresses + // (first & last IP address of each class) + "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" + + "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" + + "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" + + "|" + + // host name + "(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" + + // domain name + "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" + + // TLD identifier + "(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" + + ")" + + // port number + "(?::\\d{2,5})?" + + // resource path + "(?:/[^\\s]*)?" + +"$", java.util.regex.Pattern.CASE_INSENSITIVE); // Copyright (c) 2014 Chris O'Hara cohara87@gmail.com // licensed unter MIT license - https://github.com/chriso/validator.js/blob/master/LICENSE @@ -98,8 +132,7 @@ function isDateFormat(string) { } /** - * parse a timestamp into a date object. This is used when users - * want to set createtime explicitly when creating/editing stories. + * parse a timestamp into a date object. * @param {String} string the string * @param {String} format date format to be applied * @param {Object} timezone Java TimeZone Object (optional) @@ -114,14 +147,15 @@ function toDate(string, format, timezone) { } /** - * function checks if the string passed contains any characters that - * are forbidden in URLs and tries to create a java.net.URL from it - * FIXME: probably deprecated -> ringo.Url + * function checks if the string is an URL validating. + * Only HTTP, HTTPS and FTP are allowed protocols. * @param {String} string the string * @returns Boolean */ function isUrl(string) { - return URLPATTERN.test(string); + // uses java.util.regex.Pattern for performance reasons, + // pure JS / Rhino RegExp will not stop in feasible time! + return (URLPATTERN.matcher(string)).matches(); } /** diff --git a/test/ringo/utils/strings_test.js b/test/ringo/utils/strings_test.js index cead81431..a87fd0778 100644 --- a/test/ringo/utils/strings_test.js +++ b/test/ringo/utils/strings_test.js @@ -36,6 +36,84 @@ exports.testToDate = function () { exports.testIsUrl = function () { assert.isTrue(strings.isUrl(URL)); assert.isFalse(strings.isUrl(FOO)); + + // URLs from http://mathiasbynens.be/demo/url-regex + assert.isTrue(strings.isUrl("http://✪df.ws")); + assert.isTrue(strings.isUrl("http://foo.com/blah_blah")); + assert.isTrue(strings.isUrl("http://foo.com/blah_blah/")); + assert.isTrue(strings.isUrl("http://foo.com/blah_blah_(wikipedia)")); + assert.isTrue(strings.isUrl("http://foo.com/blah_blah_(wikipedia)_(again)")); + assert.isTrue(strings.isUrl("http://www.example.com/wpstyle/?p=364")); + assert.isTrue(strings.isUrl("https://www.example.com/foo/?bar=baz&inga=42&quux")); + assert.isTrue(strings.isUrl("http://✪df.ws/123")); + assert.isTrue(strings.isUrl("http://userid:password@example.com:8080")); + assert.isTrue(strings.isUrl("http://userid:password@example.com:8080/")); + assert.isTrue(strings.isUrl("http://userid@example.com")); + assert.isTrue(strings.isUrl("http://userid@example.com/")); + assert.isTrue(strings.isUrl("http://userid@example.com:8080")); + assert.isTrue(strings.isUrl("http://userid@example.com:8080/")); + assert.isTrue(strings.isUrl("http://userid:password@example.com")); + assert.isTrue(strings.isUrl("http://userid:password@example.com/")); + assert.isTrue(strings.isUrl("http://142.42.1.1/")); + assert.isTrue(strings.isUrl("http://142.42.1.1:8080/")); + assert.isTrue(strings.isUrl("http://➡.ws/䨹")); + assert.isTrue(strings.isUrl("http://⌘.ws")); + assert.isTrue(strings.isUrl("http://⌘.ws/")); + assert.isTrue(strings.isUrl("http://foo.com/blah_(wikipedia)#cite-1")); + assert.isTrue(strings.isUrl("http://foo.com/blah_(wikipedia)_blah#cite-1")); + assert.isTrue(strings.isUrl("http://foo.com/unicode_(✪)_in_parens")); + assert.isTrue(strings.isUrl("http://foo.com/(something)?after=parens")); + assert.isTrue(strings.isUrl("http://☺.damowmow.com/")); + assert.isTrue(strings.isUrl("http://code.google.com/events/#&product=browser")); + assert.isTrue(strings.isUrl("http://j.mp")); + assert.isTrue(strings.isUrl("ftp://foo.bar/baz")); + assert.isTrue(strings.isUrl("http://foo.bar/?q=Test%20URL-encoded%20stuff")); + assert.isTrue(strings.isUrl("http://مثال.إختبار")); + assert.isTrue(strings.isUrl("http://例子.测试")); + assert.isTrue(strings.isUrl("http://उदाहरण.परीक्षा")); + assert.isTrue(strings.isUrl("http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com")); + assert.isTrue(strings.isUrl("http://1337.net")); + assert.isTrue(strings.isUrl("http://a.b-c.de")); + assert.isTrue(strings.isUrl("http://223.255.255.254")); + assert.isFalse(strings.isUrl("http://")); + assert.isFalse(strings.isUrl("http://.")); + assert.isFalse(strings.isUrl("http://..")); + assert.isFalse(strings.isUrl("http://../")); + assert.isFalse(strings.isUrl("http://?")); + assert.isFalse(strings.isUrl("http://??")); + assert.isFalse(strings.isUrl("http://??/")); + assert.isFalse(strings.isUrl("http://#")); + assert.isFalse(strings.isUrl("http://##")); + assert.isFalse(strings.isUrl("http://##/")); + assert.isFalse(strings.isUrl("http://foo.bar?q=Spaces should be encoded")); + assert.isFalse(strings.isUrl("//")); + assert.isFalse(strings.isUrl("//a")); + assert.isFalse(strings.isUrl("///a")); + assert.isFalse(strings.isUrl("///")); + assert.isFalse(strings.isUrl("http:///a")); + assert.isFalse(strings.isUrl("foo.com")); + assert.isFalse(strings.isUrl("rdar://1234")); + assert.isFalse(strings.isUrl("h://test")); + assert.isFalse(strings.isUrl("http:// shouldfail.com")); + assert.isFalse(strings.isUrl(":// should fail")); + assert.isFalse(strings.isUrl("http://foo.bar/foo(bar)baz quux")); + assert.isFalse(strings.isUrl("ftps://foo.bar/")); + assert.isFalse(strings.isUrl("http://-error-.invalid/")); + assert.isFalse(strings.isUrl("http://a.b--c.de/")); + assert.isFalse(strings.isUrl("http://-a.b.co")); + assert.isFalse(strings.isUrl("http://a.b-.co")); + assert.isFalse(strings.isUrl("http://0.0.0.0")); + assert.isFalse(strings.isUrl("http://10.1.1.0")); + assert.isFalse(strings.isUrl("http://10.1.1.255")); + assert.isFalse(strings.isUrl("http://224.1.1.1")); + assert.isFalse(strings.isUrl("http://1.1.1.1.1")); + assert.isFalse(strings.isUrl("http://123.123.123")); + assert.isFalse(strings.isUrl("http://3628126748")); + assert.isFalse(strings.isUrl("http://.www.foo.bar/")); + assert.isFalse(strings.isUrl("http://www.foo.bar./")); + assert.isFalse(strings.isUrl("http://.www.foo.bar./")); + assert.isFalse(strings.isUrl("http://10.1.1.1")); + assert.isFalse(strings.isUrl("http://10.1.1.254")); }; exports.testIsFileName = function () {