From 9c1e5dea4b980202ba003b90fcb64183d42031b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89amonn=20McManus?= Date: Wed, 24 Aug 2022 13:40:56 -0700 Subject: [PATCH] Add `BaseEncoding.ignoreCase()`. Calling `baseEncoding.ignoreCase()` returns a new `BaseEncoding` instance equivalent to `baseEncoding` except that it accepts either case when decoding. When *en*coding it continues to use whatever case the original `baseEncoding` used. RELNOTES=`BaseEncoding` acquires a new `ignoreCase()` method to support case-insensitive decoding. PiperOrigin-RevId: 469812601 --- .../google/common/io/BaseEncodingTest.java | 51 +++++++++++- .../com/google/common/io/BaseEncoding.java | 77 +++++++++++++++++-- .../google/common/io/BaseEncodingTest.java | 51 +++++++++++- .../com/google/common/io/BaseEncoding.java | 77 +++++++++++++++++-- 4 files changed, 236 insertions(+), 20 deletions(-) diff --git a/android/guava-tests/test/com/google/common/io/BaseEncodingTest.java b/android/guava-tests/test/com/google/common/io/BaseEncodingTest.java index 896f6cf72859..bb06a0176df9 100644 --- a/android/guava-tests/test/com/google/common/io/BaseEncodingTest.java +++ b/android/guava-tests/test/com/google/common/io/BaseEncodingTest.java @@ -125,7 +125,6 @@ public void testBase64CannotUpperCase() { base64().upperCase(); fail(); } catch (IllegalStateException expected) { - // success } } @@ -134,7 +133,14 @@ public void testBase64CannotLowerCase() { base64().lowerCase(); fail(); } catch (IllegalStateException expected) { - // success + } + } + + public void testBase64CannotIgnoreCase() { + try { + base64().ignoreCase(); + fail(); + } catch (IllegalStateException expected) { } } @@ -265,6 +271,18 @@ public void testBase32UpperCaseIsNoOp() { assertThat(base32().upperCase()).isSameInstanceAs(base32()); } + public void testBase32LowerCase() { + testEncodingWithCasing(base32().lowerCase(), "foobar", "mzxw6ytboi======"); + } + + public void testBase32IgnoreCase() { + BaseEncoding ignoreCase = base32().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base32()); + assertThat(ignoreCase).isSameInstanceAs(base32().ignoreCase()); + testDecodes(ignoreCase, "MZXW6YTBOI======", "foobar"); + testDecodes(ignoreCase, "mzxw6ytboi======", "foobar"); + } + public void testBase32Offset() { testEncodesWithOffset(base32(), "foobar", 0, 6, "MZXW6YTBOI======"); testEncodesWithOffset(base32(), "foobar", 1, 5, "N5XWEYLS"); @@ -335,6 +353,33 @@ public void testBase16UpperCaseIsNoOp() { assertThat(base16().upperCase()).isSameInstanceAs(base16()); } + public void testBase16LowerCase() { + BaseEncoding lowerCase = base16().lowerCase(); + assertThat(lowerCase).isNotSameInstanceAs(base16()); + assertThat(lowerCase).isSameInstanceAs(base16().lowerCase()); + testEncodingWithCasing(lowerCase, "foobar", "666f6f626172"); + } + + public void testBase16IgnoreCase() { + BaseEncoding ignoreCase = base16().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base16()); + assertThat(ignoreCase).isSameInstanceAs(base16().ignoreCase()); + testEncodingWithCasing(ignoreCase, "foobar", "666F6F626172"); + testDecodes(ignoreCase, "666F6F626172", "foobar"); + testDecodes(ignoreCase, "666f6f626172", "foobar"); + testDecodes(ignoreCase, "666F6f626172", "foobar"); + } + + public void testBase16LowerCaseIgnoreCase() { + BaseEncoding ignoreCase = base16().lowerCase().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base16()); + assertThat(ignoreCase).isSameInstanceAs(base16().lowerCase().ignoreCase()); + testEncodingWithCasing(ignoreCase, "foobar", "666f6f626172"); + testDecodes(ignoreCase, "666F6F626172", "foobar"); + testDecodes(ignoreCase, "666f6f626172", "foobar"); + testDecodes(ignoreCase, "666F6f626172", "foobar"); + } + public void testBase16InvalidDecodings() { // These contain bytes not in the decodabet. assertFailsToDecode(base16(), "\n\n", "Unrecognized character: 0xa"); @@ -344,6 +389,8 @@ public void testBase16InvalidDecodings() { assertFailsToDecode(base16(), "ABC"); // These have a combination of invalid length and unrecognized characters. assertFailsToDecode(base16(), "?", "Invalid input length 1"); + assertFailsToDecode(base16(), "ab"); + assertFailsToDecode(base16().lowerCase(), "AB"); } public void testBase16Offset() { diff --git a/android/guava/src/com/google/common/io/BaseEncoding.java b/android/guava/src/com/google/common/io/BaseEncoding.java index 0d6f2e068068..5e4d9b8c3322 100644 --- a/android/guava/src/com/google/common/io/BaseEncoding.java +++ b/android/guava/src/com/google/common/io/BaseEncoding.java @@ -318,6 +318,15 @@ CharSequence trimTrailingPadding(CharSequence chars) { */ public abstract BaseEncoding lowerCase(); + /** + * Returns an encoding that behaves equivalently to this encoding, but decodes letters without + * regard to case. + * + * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and + * lower-case characters + */ + public abstract BaseEncoding ignoreCase(); + private static final BaseEncoding BASE64 = new Base64Encoding( "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); @@ -428,8 +437,13 @@ private static final class Alphabet { final int bytesPerChunk; private final byte[] decodabet; private final boolean[] validPadding; + private final boolean ignoreCase; Alphabet(String name, char[] chars) { + this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); + } + + private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { this.name = checkNotNull(name); this.chars = checkNotNull(chars); try { @@ -452,6 +466,17 @@ private static final class Alphabet { this.mask = chars.length - 1; + this.decodabet = decodabet; + + boolean[] validPadding = new boolean[charsPerChunk]; + for (int i = 0; i < bytesPerChunk; i++) { + validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; + } + this.validPadding = validPadding; + this.ignoreCase = ignoreCase; + } + + private static byte[] decodabetFor(char[] chars) { byte[] decodabet = new byte[Ascii.MAX + 1]; Arrays.fill(decodabet, (byte) -1); for (int i = 0; i < chars.length; i++) { @@ -460,13 +485,33 @@ private static final class Alphabet { checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); decodabet[c] = (byte) i; } - this.decodabet = decodabet; + return decodabet; + } - boolean[] validPadding = new boolean[charsPerChunk]; - for (int i = 0; i < bytesPerChunk; i++) { - validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; + /** Returns an equivalent {@code Alphabet} except it ignores case. */ + Alphabet ignoreCase() { + if (ignoreCase) { + return this; } - this.validPadding = validPadding; + + // We can't use .clone() because of GWT. + byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); + for (int upper = 'A'; upper <= 'Z'; upper++) { + int lower = upper | 0x20; + byte decodeUpper = decodabet[upper]; + byte decodeLower = decodabet[lower]; + if (decodeUpper == -1) { + newDecodabet[upper] = decodeLower; + } else { + checkState( + decodeLower == -1, + "Can't ignoreCase() since '%s' and '%s' encode different values", + (char) upper, + (char) lower); + newDecodabet[lower] = decodeUpper; + } + } + return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); } char encode(int bits) { @@ -551,14 +596,14 @@ public String toString() { public boolean equals(@CheckForNull Object other) { if (other instanceof Alphabet) { Alphabet that = (Alphabet) other; - return Arrays.equals(this.chars, that.chars); + return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); } return false; } @Override public int hashCode() { - return Arrays.hashCode(chars); + return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); } } @@ -832,6 +877,7 @@ public BaseEncoding withSeparator(String separator, int afterEveryChars) { @LazyInit @CheckForNull private transient BaseEncoding upperCase; @LazyInit @CheckForNull private transient BaseEncoding lowerCase; + @LazyInit @CheckForNull private transient BaseEncoding ignoreCase; @Override public BaseEncoding upperCase() { @@ -853,6 +899,16 @@ public BaseEncoding lowerCase() { return result; } + @Override + public BaseEncoding ignoreCase() { + BaseEncoding result = ignoreCase; + if (result == null) { + Alphabet ignore = alphabet.ignoreCase(); + result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); + } + return result; + } + BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { return new StandardBaseEncoding(alphabet, paddingChar); } @@ -860,7 +916,7 @@ BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) @Override public String toString() { StringBuilder builder = new StringBuilder("BaseEncoding."); - builder.append(alphabet.toString()); + builder.append(alphabet); if (8 % alphabet.bitsPerChar != 0) { if (paddingChar == null) { builder.append(".omitPadding()"); @@ -1170,6 +1226,11 @@ public BaseEncoding lowerCase() { return delegate.lowerCase().withSeparator(separator, afterEveryChars); } + @Override + public BaseEncoding ignoreCase() { + return delegate.ignoreCase().withSeparator(separator, afterEveryChars); + } + @Override public String toString() { return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; diff --git a/guava-tests/test/com/google/common/io/BaseEncodingTest.java b/guava-tests/test/com/google/common/io/BaseEncodingTest.java index 50b30506bd12..7ebb661e867e 100644 --- a/guava-tests/test/com/google/common/io/BaseEncodingTest.java +++ b/guava-tests/test/com/google/common/io/BaseEncodingTest.java @@ -125,7 +125,6 @@ public void testBase64CannotUpperCase() { base64().upperCase(); fail(); } catch (IllegalStateException expected) { - // success } } @@ -134,7 +133,14 @@ public void testBase64CannotLowerCase() { base64().lowerCase(); fail(); } catch (IllegalStateException expected) { - // success + } + } + + public void testBase64CannotIgnoreCase() { + try { + base64().ignoreCase(); + fail(); + } catch (IllegalStateException expected) { } } @@ -265,6 +271,18 @@ public void testBase32UpperCaseIsNoOp() { assertThat(base32().upperCase()).isSameInstanceAs(base32()); } + public void testBase32LowerCase() { + testEncodingWithCasing(base32().lowerCase(), "foobar", "mzxw6ytboi======"); + } + + public void testBase32IgnoreCase() { + BaseEncoding ignoreCase = base32().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base32()); + assertThat(ignoreCase).isSameInstanceAs(base32().ignoreCase()); + testDecodes(ignoreCase, "MZXW6YTBOI======", "foobar"); + testDecodes(ignoreCase, "mzxw6ytboi======", "foobar"); + } + public void testBase32Offset() { testEncodesWithOffset(base32(), "foobar", 0, 6, "MZXW6YTBOI======"); testEncodesWithOffset(base32(), "foobar", 1, 5, "N5XWEYLS"); @@ -335,6 +353,33 @@ public void testBase16UpperCaseIsNoOp() { assertThat(base16().upperCase()).isSameInstanceAs(base16()); } + public void testBase16LowerCase() { + BaseEncoding lowerCase = base16().lowerCase(); + assertThat(lowerCase).isNotSameInstanceAs(base16()); + assertThat(lowerCase).isSameInstanceAs(base16().lowerCase()); + testEncodingWithCasing(lowerCase, "foobar", "666f6f626172"); + } + + public void testBase16IgnoreCase() { + BaseEncoding ignoreCase = base16().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base16()); + assertThat(ignoreCase).isSameInstanceAs(base16().ignoreCase()); + testEncodingWithCasing(ignoreCase, "foobar", "666F6F626172"); + testDecodes(ignoreCase, "666F6F626172", "foobar"); + testDecodes(ignoreCase, "666f6f626172", "foobar"); + testDecodes(ignoreCase, "666F6f626172", "foobar"); + } + + public void testBase16LowerCaseIgnoreCase() { + BaseEncoding ignoreCase = base16().lowerCase().ignoreCase(); + assertThat(ignoreCase).isNotSameInstanceAs(base16()); + assertThat(ignoreCase).isSameInstanceAs(base16().lowerCase().ignoreCase()); + testEncodingWithCasing(ignoreCase, "foobar", "666f6f626172"); + testDecodes(ignoreCase, "666F6F626172", "foobar"); + testDecodes(ignoreCase, "666f6f626172", "foobar"); + testDecodes(ignoreCase, "666F6f626172", "foobar"); + } + public void testBase16InvalidDecodings() { // These contain bytes not in the decodabet. assertFailsToDecode(base16(), "\n\n", "Unrecognized character: 0xa"); @@ -344,6 +389,8 @@ public void testBase16InvalidDecodings() { assertFailsToDecode(base16(), "ABC"); // These have a combination of invalid length and unrecognized characters. assertFailsToDecode(base16(), "?", "Invalid input length 1"); + assertFailsToDecode(base16(), "ab"); + assertFailsToDecode(base16().lowerCase(), "AB"); } public void testBase16Offset() { diff --git a/guava/src/com/google/common/io/BaseEncoding.java b/guava/src/com/google/common/io/BaseEncoding.java index 0d6f2e068068..5e4d9b8c3322 100644 --- a/guava/src/com/google/common/io/BaseEncoding.java +++ b/guava/src/com/google/common/io/BaseEncoding.java @@ -318,6 +318,15 @@ CharSequence trimTrailingPadding(CharSequence chars) { */ public abstract BaseEncoding lowerCase(); + /** + * Returns an encoding that behaves equivalently to this encoding, but decodes letters without + * regard to case. + * + * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and + * lower-case characters + */ + public abstract BaseEncoding ignoreCase(); + private static final BaseEncoding BASE64 = new Base64Encoding( "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); @@ -428,8 +437,13 @@ private static final class Alphabet { final int bytesPerChunk; private final byte[] decodabet; private final boolean[] validPadding; + private final boolean ignoreCase; Alphabet(String name, char[] chars) { + this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); + } + + private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { this.name = checkNotNull(name); this.chars = checkNotNull(chars); try { @@ -452,6 +466,17 @@ private static final class Alphabet { this.mask = chars.length - 1; + this.decodabet = decodabet; + + boolean[] validPadding = new boolean[charsPerChunk]; + for (int i = 0; i < bytesPerChunk; i++) { + validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; + } + this.validPadding = validPadding; + this.ignoreCase = ignoreCase; + } + + private static byte[] decodabetFor(char[] chars) { byte[] decodabet = new byte[Ascii.MAX + 1]; Arrays.fill(decodabet, (byte) -1); for (int i = 0; i < chars.length; i++) { @@ -460,13 +485,33 @@ private static final class Alphabet { checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); decodabet[c] = (byte) i; } - this.decodabet = decodabet; + return decodabet; + } - boolean[] validPadding = new boolean[charsPerChunk]; - for (int i = 0; i < bytesPerChunk; i++) { - validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; + /** Returns an equivalent {@code Alphabet} except it ignores case. */ + Alphabet ignoreCase() { + if (ignoreCase) { + return this; } - this.validPadding = validPadding; + + // We can't use .clone() because of GWT. + byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); + for (int upper = 'A'; upper <= 'Z'; upper++) { + int lower = upper | 0x20; + byte decodeUpper = decodabet[upper]; + byte decodeLower = decodabet[lower]; + if (decodeUpper == -1) { + newDecodabet[upper] = decodeLower; + } else { + checkState( + decodeLower == -1, + "Can't ignoreCase() since '%s' and '%s' encode different values", + (char) upper, + (char) lower); + newDecodabet[lower] = decodeUpper; + } + } + return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); } char encode(int bits) { @@ -551,14 +596,14 @@ public String toString() { public boolean equals(@CheckForNull Object other) { if (other instanceof Alphabet) { Alphabet that = (Alphabet) other; - return Arrays.equals(this.chars, that.chars); + return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); } return false; } @Override public int hashCode() { - return Arrays.hashCode(chars); + return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); } } @@ -832,6 +877,7 @@ public BaseEncoding withSeparator(String separator, int afterEveryChars) { @LazyInit @CheckForNull private transient BaseEncoding upperCase; @LazyInit @CheckForNull private transient BaseEncoding lowerCase; + @LazyInit @CheckForNull private transient BaseEncoding ignoreCase; @Override public BaseEncoding upperCase() { @@ -853,6 +899,16 @@ public BaseEncoding lowerCase() { return result; } + @Override + public BaseEncoding ignoreCase() { + BaseEncoding result = ignoreCase; + if (result == null) { + Alphabet ignore = alphabet.ignoreCase(); + result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); + } + return result; + } + BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { return new StandardBaseEncoding(alphabet, paddingChar); } @@ -860,7 +916,7 @@ BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) @Override public String toString() { StringBuilder builder = new StringBuilder("BaseEncoding."); - builder.append(alphabet.toString()); + builder.append(alphabet); if (8 % alphabet.bitsPerChar != 0) { if (paddingChar == null) { builder.append(".omitPadding()"); @@ -1170,6 +1226,11 @@ public BaseEncoding lowerCase() { return delegate.lowerCase().withSeparator(separator, afterEveryChars); } + @Override + public BaseEncoding ignoreCase() { + return delegate.ignoreCase().withSeparator(separator, afterEveryChars); + } + @Override public String toString() { return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";