Skip to content

Commit

Permalink
Merge #2545: [Util] Better URL validation
Browse files Browse the repository at this point in the history
13735da [QA] Add unit test for validateURL() (random-zebra)
a62f88a [Util] Use C++11 std::regex for validateURL() function (random-zebra)

Pull request description:

  We validate (user side) the URL of budget proposals before their submission via RPC, and [#soon](#2406) via the GUI.

  As noted by @NoobieDev12  in [this comment](#2406 (comment)), this URL validation is too limited. Essentially we are only checking that the string begins with the protocol.

  This PR changes the implementation of the function `validateURL()` , using C++11 `std::regex` (which was the original intention of #950, but discarded to avoid bumping the minimum supported ABI at the time).

  I've used the following pattern to match `^(https?)://[^\s/$.?#][^\s]*[^\s/.]\.[^\s/.][^\s]*[^\s.]$`, which is far from a complete validation, but it should cover the basic cases.
  We can refine it in the future if need be.

  Unit tests included.

ACKs for top commit:
  furszy:
    nice 👌 , utACK 13735da

Tree-SHA512: 436bd7eeacd80e528f8cabf4b87ad7435369801cd4050a136715bc982258ab7cecd2c32d0dd430d7e7b43e41729dc2660fbb8e8f4f9a796867ce5ad206df3e66
  • Loading branch information
random-zebra committed Sep 12, 2021
2 parents 1286d3e + 13735da commit 129446a
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 19 deletions.
32 changes: 32 additions & 0 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,38 @@ BOOST_AUTO_TEST_CASE(test_Capitalize)
BOOST_CHECK_EQUAL(Capitalize("\x00\xfe\xff"), "\x00\xfe\xff");
}

BOOST_AUTO_TEST_CASE(test_validateURL)
{
// Must pass
BOOST_CHECK(validateURL("http://foo.bar"));
BOOST_CHECK(validateURL("https://foo.bar"));
BOOST_CHECK(validateURL("https://foo.bar/"));
BOOST_CHECK(validateURL("http://pivx.foo.bar"));
BOOST_CHECK(validateURL("https://foo.bar/pivx"));
BOOST_CHECK(validateURL("https://foo.bar/pivx/more/"));
BOOST_CHECK(validateURL("https://142.2.3.1"));
BOOST_CHECK(validateURL("https://foo_bar.pivx.com"));
BOOST_CHECK(validateURL("http://foo.bar/?baz=some"));
BOOST_CHECK(validateURL("http://foo.bar/?baz=some&p=364"));

// Must fail
BOOST_CHECK(!validateURL("BlahBlah"));
BOOST_CHECK(!validateURL("foo.bar"));
BOOST_CHECK(!validateURL("://foo.bar"));
BOOST_CHECK(!validateURL("www.foo.bar"));
BOOST_CHECK(!validateURL("http://foo..bar"));
BOOST_CHECK(!validateURL("http:///foo.bar"));
BOOST_CHECK(!validateURL("http:// foo.bar"));
BOOST_CHECK(!validateURL("http://foo .bar"));
BOOST_CHECK(!validateURL("http://foo"));
BOOST_CHECK(!validateURL("http://foo.bar."));
BOOST_CHECK(!validateURL("http://foo.bar.."));
BOOST_CHECK(!validateURL("http://"));
BOOST_CHECK(!validateURL("http://.something.com"));
BOOST_CHECK(!validateURL("http://?something.com"));
BOOST_CHECK(!validateURL("https://foo.bar/?q=Spaces are not encoded"));
}

static void TestOtherThread(fs::path dirname, std::string lockname, bool *result)
{
*result = LockDirectory(dirname, lockname);
Expand Down
26 changes: 12 additions & 14 deletions src/utilstrencodings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <cstring>
#include <errno.h>
#include <limits>
#include <regex>



Expand All @@ -37,27 +38,24 @@ std::string SanitizeString(const std::string& str, int rule)
return strResult;
}

bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize) {
bool validateURL(std::string strURL)
{
std::string strErr;
return validateURL(strURL, strErr);
}

bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize)
{
// Check URL size
if (strURL.size() > maxSize) {
strErr = strprintf("Invalid URL: %d exceeds limit of %d characters.", strURL.size(), maxSize);
return false;
}

std::vector<std::string> reqPre;

// Required initial strings; URL must contain one
reqPre.push_back("http://");
reqPre.push_back("https://");

// check fronts
bool found = false;
for (int i=0; i < (int) reqPre.size() && !found; i++) {
if (strURL.find(reqPre[i]) == 0) found = true;
}
if ((!found) && (reqPre.size() > 0)) {
strErr = "Invalid URL, check scheme (e.g. https://)";
// Validate URL
std::regex url_regex("^(https?)://[^\\s/$.?#][^\\s]*[^\\s/.]\\.[^\\s/.][^\\s]*[^\\s.]$");
if (!std::regex_match(strURL, url_regex)) {
strErr = "Invalid URL";
return false;
}

Expand Down
5 changes: 3 additions & 2 deletions src/utilstrencodings.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ std::string SanitizeString(const std::string& str, int rule = SAFE_CHARS_DEFAULT
/**
* Check URL format for conformance for validity to a defined pattern
* @param[in] strURL The string to be processed for validity
* @param[in] stdErr A string that will be loaded with any validation error message
* @param[in] strErr A string that will be loaded with any validation error message
* @param[in] maxSize An unsigned int, defaulted to 64, to restrict the length
* @return A bool, true if valid, false if not (reason in stdErr)
* @return A bool, true if valid, false if not (reason in strErr)
*/
bool validateURL(std::string strURL, std::string& strErr, unsigned int maxSize = 64);
bool validateURL(std::string strURL);

std::vector<unsigned char> ParseHex(const char* psz);
std::vector<unsigned char> ParseHex(const std::string& str);
Expand Down
6 changes: 3 additions & 3 deletions test/functional/rpc_budget.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ def run_test(self):

self.log.info("Test without URL scheme")
scheme = ''
assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)
assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)

self.log.info('Test with invalid URL scheme: ftp://')
scheme = 'ftp://'
assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)
assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)

self.log.info("Test with invalid double character scheme: hhttps://")
scheme = 'hhttps://'
url = 'test.com'
assert_raises_rpc_error(-8, "Invalid URL, check scheme (e.g. https://)", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)
assert_raises_rpc_error(-8, "Invalid URL", self.nodes[0].preparebudget, name, scheme + url, 1, nextsuperblock, address, 100)

self.log.info("Test with valid scheme: http://")
name = 'testvalid1'
Expand Down

0 comments on commit 129446a

Please sign in to comment.