From a62f88aaf74cc9d7a2967cbc1e062f99609fddf3 Mon Sep 17 00:00:00 2001 From: random-zebra Date: Wed, 8 Sep 2021 19:25:29 +0200 Subject: [PATCH 1/2] [Util] Use C++11 std::regex for validateURL() function --- src/utilstrencodings.cpp | 26 ++++++++++++-------------- src/utilstrencodings.h | 5 +++-- test/functional/rpc_budget.py | 6 +++--- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/utilstrencodings.cpp b/src/utilstrencodings.cpp index 087c8fb51e964..78c8aafe7a5d1 100644 --- a/src/utilstrencodings.cpp +++ b/src/utilstrencodings.cpp @@ -14,6 +14,7 @@ #include #include #include +#include @@ -37,27 +38,24 @@ std::string SanitizeString(const std::string& str, int rule) return strResult; } -bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize) { +bool validateURL(std::string strURL) +{ + std::string strErr; + return validateURL(strURL, strErr); +} +bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize) +{ // Check URL size if (strURL.size() > maxSize) { strErr = strprintf("Invalid URL: %d exceeds limit of %d characters.", strURL.size(), maxSize); return false; } - std::vector reqPre; - - // Required initial strings; URL must contain one - reqPre.push_back("http://"); - reqPre.push_back("https://"); - - // check fronts - bool found = false; - for (int i=0; i < (int) reqPre.size() && !found; i++) { - if (strURL.find(reqPre[i]) == 0) found = true; - } - if ((!found) && (reqPre.size() > 0)) { - strErr = "Invalid URL, check scheme (e.g. https://)"; + // Validate URL + std::regex url_regex("^(https?)://[^\\s/$.?#][^\\s]*[^\\s/.]\\.[^\\s/.][^\\s]*[^\\s.]$"); + if (!std::regex_match(strURL, url_regex)) { + strErr = "Invalid URL"; return false; } diff --git a/src/utilstrencodings.h b/src/utilstrencodings.h index 544c72a4ce322..766eaa507f320 100644 --- a/src/utilstrencodings.h +++ b/src/utilstrencodings.h @@ -45,11 +45,12 @@ std::string SanitizeString(const std::string& str, int rule = SAFE_CHARS_DEFAULT /** * Check URL format for conformance for validity to a defined pattern * @param[in] strURL The string to be processed for validity -* @param[in] stdErr A string that will be loaded with any validation error message +* @param[in] strErr A string that will be loaded with any validation error message * @param[in] maxSize An unsigned int, defaulted to 64, to restrict the length -* @return A bool, true if valid, false if not (reason in stdErr) +* @return A bool, true if valid, false if not (reason in strErr) */ bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize = 64); +bool validateURL(std::string strURL); std::vector ParseHex(const char* psz); std::vector ParseHex(const std::string& str); diff --git a/test/functional/rpc_budget.py b/test/functional/rpc_budget.py index 176c02b82d45e..120e170cc4e14 100755 --- a/test/functional/rpc_budget.py +++ b/test/functional/rpc_budget.py @@ -63,16 +63,16 @@ def run_test(self): self.log.info("Test without URL scheme") scheme = '' - assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) + assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) self.log.info('Test with invalid URL scheme: ftp://') scheme = 'ftp://' - assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) + assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) self.log.info("Test with invalid double character scheme: hhttps://") scheme = 'hhttps://' url = 'test.com' - assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) + assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100) self.log.info("Test with valid scheme: http://") name = 'testvalid1' From 13735dac1773a0765fe90bba53c377abb868efcc Mon Sep 17 00:00:00 2001 From: random-zebra Date: Wed, 8 Sep 2021 19:26:51 +0200 Subject: [PATCH 2/2] [QA] Add unit test for validateURL() --- src/test/util_tests.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index f6052c6ecf21a..f39e9d9a28d0f 100644 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -969,6 +969,38 @@ BOOST_AUTO_TEST_CASE(test_Capitalize) BOOST_CHECK_EQUAL(Capitalize("\x00\xfe\xff"), "\x00\xfe\xff"); } +BOOST_AUTO_TEST_CASE(test_validateURL) +{ + // Must pass + BOOST_CHECK(validateURL("http://foo.bar")); + BOOST_CHECK(validateURL("https://foo.bar")); + BOOST_CHECK(validateURL("https://foo.bar/")); + BOOST_CHECK(validateURL("http://pivx.foo.bar")); + BOOST_CHECK(validateURL("https://foo.bar/pivx")); + BOOST_CHECK(validateURL("https://foo.bar/pivx/more/")); + BOOST_CHECK(validateURL("https://142.2.3.1")); + BOOST_CHECK(validateURL("https://foo_bar.pivx.com")); + BOOST_CHECK(validateURL("http://foo.bar/?baz=some")); + BOOST_CHECK(validateURL("http://foo.bar/?baz=some&p=364")); + + // Must fail + BOOST_CHECK(!validateURL("BlahBlah")); + BOOST_CHECK(!validateURL("foo.bar")); + BOOST_CHECK(!validateURL("://foo.bar")); + BOOST_CHECK(!validateURL("www.foo.bar")); + BOOST_CHECK(!validateURL("http://foo..bar")); + BOOST_CHECK(!validateURL("http:///foo.bar")); + BOOST_CHECK(!validateURL("http:// foo.bar")); + BOOST_CHECK(!validateURL("http://foo .bar")); + BOOST_CHECK(!validateURL("http://foo")); + BOOST_CHECK(!validateURL("http://foo.bar.")); + BOOST_CHECK(!validateURL("http://foo.bar..")); + BOOST_CHECK(!validateURL("http://")); + BOOST_CHECK(!validateURL("http://.something.com")); + BOOST_CHECK(!validateURL("http://?something.com")); + BOOST_CHECK(!validateURL("https://foo.bar/?q=Spaces are not encoded")); +} + static void TestOtherThread(fs::path dirname, std::string lockname, bool *result) { *result = LockDirectory(dirname, lockname);