diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 40677507e5b8..660487b53e0f 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -4,4 +4,5 @@ Changes ------- +* http: fixed URL parsing for HTTP/1.1 fully qualified URLs and connect requests containing IPv6 addresses. * http: fixed bugs in datadog and squash filter's handling of responses with no bodies. diff --git a/source/common/http/utility.cc b/source/common/http/utility.cc index 204743ff52f6..3c0176536805 100644 --- a/source/common/http/utility.cc +++ b/source/common/http/utility.cc @@ -207,6 +207,30 @@ namespace Http { static const char kDefaultPath[] = "/"; +// If http_parser encounters an IP address [address] as the host it will set the offset and +// length to point to 'address' rather than '[address]'. Fix this by adjusting the offset +// and length to include the brackets. +// @param absolute_url the absolute URL. This is usually of the form // http://host/path +// but may be host:port for CONNECT requests +// @param offset the offset for the first character of the host. For IPv6 hosts +// this will point to the first character inside the brackets and will be +// adjusted to point at the brackets +// @param len the length of the host-and-port field. For IPv6 hosts this will +// not include the brackets and will be adjusted to do so. +bool maybeAdjustForIpv6(absl::string_view absolute_url, uint64_t& offset, uint64_t& len) { + // According to https://tools.ietf.org/html/rfc3986#section-3.2.2 the only way a hostname + // may begin with '[' is if it's an ipv6 address. + if (offset == 0 || *(absolute_url.data() + offset - 1) != '[') { + return false; + } + // Start one character sooner and end one character later. + offset--; + len += 2; + // HTTP parser ensures that any [ has a closing ] + ASSERT(absolute_url.length() >= offset + len); + return true; +} + bool Utility::Url::initialize(absl::string_view absolute_url, bool is_connect) { struct http_parser_url u; http_parser_url_init(&u); @@ -223,20 +247,27 @@ bool Utility::Url::initialize(absl::string_view absolute_url, bool is_connect) { scheme_ = absl::string_view(absolute_url.data() + u.field_data[UF_SCHEMA].off, u.field_data[UF_SCHEMA].len); - uint16_t authority_len = u.field_data[UF_HOST].len; + uint64_t authority_len = u.field_data[UF_HOST].len; if ((u.field_set & (1 << UF_PORT)) == (1 << UF_PORT)) { authority_len = authority_len + u.field_data[UF_PORT].len + 1; } - host_and_port_ = - absl::string_view(absolute_url.data() + u.field_data[UF_HOST].off, authority_len); + + uint64_t authority_beginning = u.field_data[UF_HOST].off; + const bool is_ipv6 = maybeAdjustForIpv6(absolute_url, authority_beginning, authority_len); + host_and_port_ = absl::string_view(absolute_url.data() + authority_beginning, authority_len); + if (is_ipv6 && !parseAuthority(host_and_port_).is_ip_address_) { + return false; + } // RFC allows the absolute-uri to not end in /, but the absolute path form - // must start with - uint64_t path_len = absolute_url.length() - (u.field_data[UF_HOST].off + hostAndPort().length()); - if (path_len > 0) { - uint64_t path_beginning = u.field_data[UF_HOST].off + hostAndPort().length(); - path_and_query_params_ = absl::string_view(absolute_url.data() + path_beginning, path_len); + // must start with. Determine if there's a non-zero path, and if so determine + // the length of the path, query params etc. + uint64_t path_etc_len = absolute_url.length() - (authority_beginning + hostAndPort().length()); + if (path_etc_len > 0) { + uint64_t path_beginning = authority_beginning + hostAndPort().length(); + path_and_query_params_ = absl::string_view(absolute_url.data() + path_beginning, path_etc_len); } else if (!is_connect) { + ASSERT((u.field_set & (1 << UF_PATH)) == 0); path_and_query_params_ = absl::string_view(kDefaultPath, 1); } return true; diff --git a/test/common/http/utility_test.cc b/test/common/http/utility_test.cc index 8751acddb024..472381c7d9c2 100644 --- a/test/common/http/utility_test.cc +++ b/test/common/http/utility_test.cc @@ -1090,6 +1090,9 @@ TEST(Url, ParsingFails) { EXPECT_FALSE(url.initialize("random_scheme://host.com/path", false)); EXPECT_FALSE(url.initialize("http://www.foo.com", true)); EXPECT_FALSE(url.initialize("foo.com", true)); + EXPECT_FALSE(url.initialize("http://[notaddress]:80/?query=param", false)); + EXPECT_FALSE(url.initialize("http://[1::z::2]:80/?query=param", false)); + EXPECT_FALSE(url.initialize("http://1.2.3.4:65536/?query=param", false)); } void validateUrl(absl::string_view raw_url, absl::string_view expected_scheme, @@ -1101,12 +1104,17 @@ void validateUrl(absl::string_view raw_url, absl::string_view expected_scheme, EXPECT_EQ(url.pathAndQueryParams(), expected_path); } -void validateConnectUrl(absl::string_view raw_url, absl::string_view expected_host_port) { +void validateConnectUrl(absl::string_view raw_url) { Utility::Url url; ASSERT_TRUE(url.initialize(raw_url, true)) << "Failed to initialize " << raw_url; EXPECT_TRUE(url.scheme().empty()); EXPECT_TRUE(url.pathAndQueryParams().empty()); - EXPECT_EQ(url.hostAndPort(), expected_host_port); + EXPECT_EQ(url.hostAndPort(), raw_url); +} + +void invalidConnectUrl(absl::string_view raw_url) { + Utility::Url url; + ASSERT_FALSE(url.initialize(raw_url, true)) << "Unexpectedly initialized " << raw_url; } TEST(Url, ParsingTest) { @@ -1141,6 +1149,14 @@ TEST(Url, ParsingTest) { validateUrl("http://www.host.com:80/?query=param", "http", "www.host.com:80", "/?query=param"); validateUrl("http://www.host.com/?query=param", "http", "www.host.com", "/?query=param"); + // Test with an ipv4 host address. + validateUrl("http://1.2.3.4/?query=param", "http", "1.2.3.4", "/?query=param"); + validateUrl("http://1.2.3.4:80/?query=param", "http", "1.2.3.4:80", "/?query=param"); + + // Test with an ipv6 address + validateUrl("http://[1::2:3]/?query=param", "http", "[1::2:3]", "/?query=param"); + validateUrl("http://[1::2:3]:80/?query=param", "http", "[1::2:3]:80", "/?query=param"); + // Test url with query parameter but without slash validateUrl("http://www.host.com:80?query=param", "http", "www.host.com:80", "?query=param"); validateUrl("http://www.host.com?query=param", "http", "www.host.com", "?query=param"); @@ -1163,8 +1179,16 @@ TEST(Url, ParsingTest) { } TEST(Url, ParsingForConnectTest) { - validateConnectUrl("host.com:443", "host.com:443"); - validateConnectUrl("host.com:80", "host.com:80"); + validateConnectUrl("host.com:443"); + validateConnectUrl("host.com:80"); + validateConnectUrl("1.2.3.4:80"); + validateConnectUrl("[1:2::3:4]:80"); + + invalidConnectUrl("[::12345678]:80"); + invalidConnectUrl("[1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1]:80"); + invalidConnectUrl("[1:1]:80"); + invalidConnectUrl("[:::]:80"); + invalidConnectUrl("[::1::]:80"); } void validatePercentEncodingEncodeDecode(absl::string_view source, diff --git a/test/integration/integration_test.cc b/test/integration/integration_test.cc index 3d90d5be067c..02d784476fb7 100644 --- a/test/integration/integration_test.cc +++ b/test/integration/integration_test.cc @@ -777,6 +777,41 @@ TEST_P(IntegrationTest, AbsolutePath) { EXPECT_FALSE(response.find("HTTP/1.1 404 Not Found\r\n") == 0); } +// Make that both IPv4 and IPv6 hosts match when using relative and absolute URLs. +TEST_P(IntegrationTest, TestHostWithAddress) { + useAccessLog("%REQ(Host)%\n"); + std::string address_string; + if (GetParam() == Network::Address::IpVersion::v4) { + address_string = TestUtility::getIpv4Loopback(); + } else { + address_string = "[::1]"; + } + + auto host = config_helper_.createVirtualHost(address_string.c_str(), "/"); + host.set_require_tls(envoy::config::route::v3::VirtualHost::ALL); + config_helper_.addVirtualHost(host); + + initialize(); + std::string response; + + // Test absolute URL with ipv6. + sendRawHttpAndWaitForResponse( + lookupPort("http"), absl::StrCat("GET http://", address_string, " HTTP/1.1\r\n\r\n").c_str(), + &response, true); + EXPECT_FALSE(response.find("HTTP/1.1 404 Not Found\r\n") == 0); + EXPECT_TRUE(response.find("301") != std::string::npos); + EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr(address_string)); + + // Test normal IPv6 request as well. + response.clear(); + sendRawHttpAndWaitForResponse( + lookupPort("http"), + absl::StrCat("GET / HTTP/1.1\r\nHost: ", address_string, "\r\n\r\n").c_str(), &response, + true); + EXPECT_FALSE(response.find("HTTP/1.1 404 Not Found\r\n") == 0); + EXPECT_TRUE(response.find("301") != std::string::npos); +} + TEST_P(IntegrationTest, AbsolutePathWithPort) { // Configure www.namewithport.com:1234 to send a redirect, and ensure the redirect is // encountered via absolute URL with a port. @@ -789,6 +824,7 @@ TEST_P(IntegrationTest, AbsolutePathWithPort) { lookupPort("http"), "GET http://www.namewithport.com:1234 HTTP/1.1\r\nHost: host\r\n\r\n", &response, true); EXPECT_FALSE(response.find("HTTP/1.1 404 Not Found\r\n") == 0); + EXPECT_TRUE(response.find("301") != std::string::npos); } TEST_P(IntegrationTest, AbsolutePathWithoutPort) {