Skip to content

Commit

Permalink
Reject non-ASCII digits in HostAndPort port numbers.
Browse files Browse the repository at this point in the history
In `HostAndPort.fromString`, we previously allowed any Unicode digit. But the RFCs generally call for ASCII digits specifically.

Document that `InetAddresses.forString` and related methods _do_ allow non-ASCII digits and explain how to check for their presence. We found that a number of projects were testing that these digits work, so changing that seems risky.

Fixes #5681.
Fixes #5682.

Thanks to @Marcono1234 for the bug report.

RELNOTES=`net`: Port numbers spelled with non-ASCII digits are no longer allowed in `HostAndPort.fromString`.
PiperOrigin-RevId: 390396550
  • Loading branch information
eamonnmcmanus authored and Google Java Core Libraries committed Aug 12, 2021
1 parent 646727b commit 53fd1d7
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ public void testFromStringUnusedDefaultPort() {
checkFromStringCase("[2001::2]:85", 77, "2001::2", 85, true);
}

public void testFromStringNonAsciiDigits() {
// Same as testFromStringUnusedDefaultPort but with Gujarati digits for port numbers.
checkFromStringCase("gmail.com:૮1", 77, null, -1, false);
checkFromStringCase("192.0.2.2:૮૩", 77, null, -1, false);
checkFromStringCase("[2001::2]:૮૫", 77, null, -1, false);
}

public void testFromStringBadPort() {
// Out-of-range ports.
checkFromStringCase("google.com:65536", 1, null, 99, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,20 @@ public void testForStringIPv4Input() throws UnknownHostException {
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv4NonAsciiInput() throws UnknownHostException {
String ipStr = "૧૯૨.૧૬૮.૦.૧"; // 192.168.0.1 in Gujarati digits
// Shouldn't hit DNS, because it's an IP string literal.
InetAddress ipv4Addr;
try {
ipv4Addr = InetAddress.getByName(ipStr);
} catch (UnknownHostException e) {
// OK: this is probably Android, which is stricter.
return;
}
assertEquals(ipv4Addr, InetAddresses.forString(ipStr));
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6Input() throws UnknownHostException {
String ipStr = "3ffe::1";
// Shouldn't hit DNS, because it's an IP string literal.
Expand All @@ -143,6 +157,20 @@ public void testForStringIPv6Input() throws UnknownHostException {
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6NonAsciiInput() throws UnknownHostException {
String ipStr = "૩ffe::૧"; // 3ffe::1 with Gujarati digits for 3 and 1
// Shouldn't hit DNS, because it's an IP string literal.
InetAddress ipv6Addr;
try {
ipv6Addr = InetAddress.getByName(ipStr);
} catch (UnknownHostException e) {
// OK: this is probably Android, which is stricter.
return;
}
assertEquals(ipv6Addr, InetAddresses.forString(ipStr));
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6EightColons() throws UnknownHostException {
ImmutableSet<String> eightColons =
ImmutableSet.of("::7:6:5:4:3:2:1", "::7:6:5:4:3:2:0", "7:6:5:4:3:2:1::", "0:6:5:4:3:2:1::");
Expand Down
6 changes: 5 additions & 1 deletion android/guava/src/com/google/common/net/HostAndPort.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;
import com.google.common.base.CharMatcher;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.errorprone.annotations.Immutable;
Expand Down Expand Up @@ -190,7 +191,10 @@ public static HostAndPort fromString(String hostPortString) {
if (!Strings.isNullOrEmpty(portString)) {
// Try to parse the whole port string as a number.
// JDK7 accepts leading plus signs. We don't want to.
checkArgument(!portString.startsWith("+"), "Unparseable port number: %s", hostPortString);
checkArgument(
!portString.startsWith("+") && CharMatcher.ascii().matchesAllOf(portString),
"Unparseable port number: %s",
hostPortString);
try {
port = Integer.parseInt(portString);
} catch (NumberFormatException e) {
Expand Down
34 changes: 29 additions & 5 deletions android/guava/src/com/google/common/net/InetAddresses.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ private static Inet4Address getInet4Address(byte[] bytes) {
*
* <p>Anything after a {@code %} in an IPv6 address is ignored (assumed to be a Scope ID).
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} containing an IPv4 or IPv6 string literal, e.g. {@code
* "192.168.0.1"} or {@code "2001:db8::1"}
* @return {@link InetAddress} representing the argument
Expand All @@ -154,6 +159,11 @@ public static InetAddress forString(String ipString) {
* Returns {@code true} if the supplied string is a valid IP string literal, {@code false}
* otherwise.
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to evaluated as an IP string literal
* @return {@code true} if the argument is a valid IP string literal
*/
Expand Down Expand Up @@ -502,10 +512,15 @@ public static String toUriString(InetAddress ip) {
* Returns an InetAddress representing the literal IPv4 or IPv6 host portion of a URL, encoded in
* the format specified by RFC 3986 section 3.2.2.
*
* <p>This function is similar to {@link InetAddresses#forString(String)}, however, it requires
* that IPv6 addresses are surrounded by square brackets.
* <p>This method is similar to {@link InetAddresses#forString(String)}, however, it requires that
* IPv6 addresses are surrounded by square brackets.
*
* <p>This method is the inverse of {@link InetAddresses#toUriString(java.net.InetAddress)}.
*
* <p>This function is the inverse of {@link InetAddresses#toUriString(java.net.InetAddress)}.
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param hostAddr A RFC 3986 section 3.2.2 encoded IPv4 or IPv6 address
* @return an InetAddress representing the address in {@code hostAddr}
Expand Down Expand Up @@ -549,6 +564,11 @@ private static InetAddress forUriStringNoThrow(String hostAddr) {
* Returns {@code true} if the supplied string is a valid URI IP string literal, {@code false}
* otherwise.
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to evaluated as an IP URI host string literal
* @return {@code true} if the argument is a valid IP URI host
*/
Expand Down Expand Up @@ -844,6 +864,10 @@ public static Inet4Address getEmbeddedIPv4ClientAddress(Inet6Address ip) {
* obscure {@link Inet6Address} methods, but it would be unwise to depend on such a
* poorly-documented feature.)
*
* <p>This method accepts non-ASCII digits. That is consistent with {@link InetAddress}, but not
* with various RFCs. If you want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to be examined for embedded IPv4-mapped IPv6 address format
* @return {@code true} if the argument is a valid "mapped" address
* @since 10.0
Expand Down Expand Up @@ -871,7 +895,7 @@ public static boolean isMappedIPv4Address(String ipString) {
*
* <p>HACK: As long as applications continue to use IPv4 addresses for indexing into tables,
* accounting, et cetera, it may be necessary to <b>coerce</b> IPv6 addresses into IPv4 addresses.
* This function does so by hashing 64 bits of the IPv6 address into {@code 224.0.0.0/3} (64 bits
* This method does so by hashing 64 bits of the IPv6 address into {@code 224.0.0.0/3} (64 bits
* into 29 bits):
*
* <ul>
Expand All @@ -881,7 +905,7 @@ public static boolean isMappedIPv4Address(String ipString) {
*
* <p>A "coerced" IPv4 address is equivalent to itself.
*
* <p>NOTE: This function is failsafe for security purposes: ALL IPv6 addresses (except localhost
* <p>NOTE: This method is failsafe for security purposes: ALL IPv6 addresses (except localhost
* (::1)) are hashed to avoid the security risk associated with extracting an embedded IPv4
* address that might permit elevated privileges.
*
Expand Down
7 changes: 7 additions & 0 deletions guava-tests/test/com/google/common/net/HostAndPortTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ public void testFromStringUnusedDefaultPort() {
checkFromStringCase("[2001::2]:85", 77, "2001::2", 85, true);
}

public void testFromStringNonAsciiDigits() {
// Same as testFromStringUnusedDefaultPort but with Gujarati digits for port numbers.
checkFromStringCase("gmail.com:૮1", 77, null, -1, false);
checkFromStringCase("192.0.2.2:૮૩", 77, null, -1, false);
checkFromStringCase("[2001::2]:૮૫", 77, null, -1, false);
}

public void testFromStringBadPort() {
// Out-of-range ports.
checkFromStringCase("google.com:65536", 1, null, 99, false);
Expand Down
28 changes: 28 additions & 0 deletions guava-tests/test/com/google/common/net/InetAddressesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,20 @@ public void testForStringIPv4Input() throws UnknownHostException {
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv4NonAsciiInput() throws UnknownHostException {
String ipStr = "૧૯૨.૧૬૮.૦.૧"; // 192.168.0.1 in Gujarati digits
// Shouldn't hit DNS, because it's an IP string literal.
InetAddress ipv4Addr;
try {
ipv4Addr = InetAddress.getByName(ipStr);
} catch (UnknownHostException e) {
// OK: this is probably Android, which is stricter.
return;
}
assertEquals(ipv4Addr, InetAddresses.forString(ipStr));
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6Input() throws UnknownHostException {
String ipStr = "3ffe::1";
// Shouldn't hit DNS, because it's an IP string literal.
Expand All @@ -143,6 +157,20 @@ public void testForStringIPv6Input() throws UnknownHostException {
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6NonAsciiInput() throws UnknownHostException {
String ipStr = "૩ffe::૧"; // 3ffe::1 with Gujarati digits for 3 and 1
// Shouldn't hit DNS, because it's an IP string literal.
InetAddress ipv6Addr;
try {
ipv6Addr = InetAddress.getByName(ipStr);
} catch (UnknownHostException e) {
// OK: this is probably Android, which is stricter.
return;
}
assertEquals(ipv6Addr, InetAddresses.forString(ipStr));
assertTrue(InetAddresses.isInetAddress(ipStr));
}

public void testForStringIPv6EightColons() throws UnknownHostException {
ImmutableSet<String> eightColons =
ImmutableSet.of("::7:6:5:4:3:2:1", "::7:6:5:4:3:2:0", "7:6:5:4:3:2:1::", "0:6:5:4:3:2:1::");
Expand Down
6 changes: 5 additions & 1 deletion guava/src/com/google/common/net/HostAndPort.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;
import com.google.common.base.CharMatcher;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.errorprone.annotations.Immutable;
Expand Down Expand Up @@ -190,7 +191,10 @@ public static HostAndPort fromString(String hostPortString) {
if (!Strings.isNullOrEmpty(portString)) {
// Try to parse the whole port string as a number.
// JDK7 accepts leading plus signs. We don't want to.
checkArgument(!portString.startsWith("+"), "Unparseable port number: %s", hostPortString);
checkArgument(
!portString.startsWith("+") && CharMatcher.ascii().matchesAllOf(portString),
"Unparseable port number: %s",
hostPortString);
try {
port = Integer.parseInt(portString);
} catch (NumberFormatException e) {
Expand Down
34 changes: 29 additions & 5 deletions guava/src/com/google/common/net/InetAddresses.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ private static Inet4Address getInet4Address(byte[] bytes) {
*
* <p>Anything after a {@code %} in an IPv6 address is ignored (assumed to be a Scope ID).
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} containing an IPv4 or IPv6 string literal, e.g. {@code
* "192.168.0.1"} or {@code "2001:db8::1"}
* @return {@link InetAddress} representing the argument
Expand All @@ -154,6 +159,11 @@ public static InetAddress forString(String ipString) {
* Returns {@code true} if the supplied string is a valid IP string literal, {@code false}
* otherwise.
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to evaluated as an IP string literal
* @return {@code true} if the argument is a valid IP string literal
*/
Expand Down Expand Up @@ -502,10 +512,15 @@ public static String toUriString(InetAddress ip) {
* Returns an InetAddress representing the literal IPv4 or IPv6 host portion of a URL, encoded in
* the format specified by RFC 3986 section 3.2.2.
*
* <p>This function is similar to {@link InetAddresses#forString(String)}, however, it requires
* that IPv6 addresses are surrounded by square brackets.
* <p>This method is similar to {@link InetAddresses#forString(String)}, however, it requires that
* IPv6 addresses are surrounded by square brackets.
*
* <p>This method is the inverse of {@link InetAddresses#toUriString(java.net.InetAddress)}.
*
* <p>This function is the inverse of {@link InetAddresses#toUriString(java.net.InetAddress)}.
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param hostAddr A RFC 3986 section 3.2.2 encoded IPv4 or IPv6 address
* @return an InetAddress representing the address in {@code hostAddr}
Expand Down Expand Up @@ -549,6 +564,11 @@ private static InetAddress forUriStringNoThrow(String hostAddr) {
* Returns {@code true} if the supplied string is a valid URI IP string literal, {@code false}
* otherwise.
*
* <p>This method accepts non-ASCII digits, for example {@code "192.168.0.1"} (those are fullwidth
* characters). That is consistent with {@link InetAddress}, but not with various RFCs. If you
* want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to evaluated as an IP URI host string literal
* @return {@code true} if the argument is a valid IP URI host
*/
Expand Down Expand Up @@ -844,6 +864,10 @@ public static Inet4Address getEmbeddedIPv4ClientAddress(Inet6Address ip) {
* obscure {@link Inet6Address} methods, but it would be unwise to depend on such a
* poorly-documented feature.)
*
* <p>This method accepts non-ASCII digits. That is consistent with {@link InetAddress}, but not
* with various RFCs. If you want to accept ASCII digits only, you can use something like {@code
* CharMatcher.ascii().matchesAllOf(ipString)}.
*
* @param ipString {@code String} to be examined for embedded IPv4-mapped IPv6 address format
* @return {@code true} if the argument is a valid "mapped" address
* @since 10.0
Expand Down Expand Up @@ -871,7 +895,7 @@ public static boolean isMappedIPv4Address(String ipString) {
*
* <p>HACK: As long as applications continue to use IPv4 addresses for indexing into tables,
* accounting, et cetera, it may be necessary to <b>coerce</b> IPv6 addresses into IPv4 addresses.
* This function does so by hashing 64 bits of the IPv6 address into {@code 224.0.0.0/3} (64 bits
* This method does so by hashing 64 bits of the IPv6 address into {@code 224.0.0.0/3} (64 bits
* into 29 bits):
*
* <ul>
Expand All @@ -881,7 +905,7 @@ public static boolean isMappedIPv4Address(String ipString) {
*
* <p>A "coerced" IPv4 address is equivalent to itself.
*
* <p>NOTE: This function is failsafe for security purposes: ALL IPv6 addresses (except localhost
* <p>NOTE: This method is failsafe for security purposes: ALL IPv6 addresses (except localhost
* (::1)) are hashed to avoid the security risk associated with extracting an embedded IPv4
* address that might permit elevated privileges.
*
Expand Down

0 comments on commit 53fd1d7

Please sign in to comment.