Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Uid#decodeId to decode from a byte array slice #26987

Merged
merged 2 commits into from
Oct 12, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 52 additions & 44 deletions core/src/main/java/org/elasticsearch/index/mapper/Uid.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,36 +135,36 @@ static boolean isURLBase64WithoutPadding(String id) {
// 'xxx=' and 'xxx' could be considered the same id
final int length = id.length();
switch (length & 0x03) {
case 0:
break;
case 1:
return false;
case 2:
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
char last = id.charAt(length - 1);
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
case 0:
break;
case 1:
return false;
}
break;
case 3:
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
last = id.charAt(length - 1);
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
case 2:
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
char last = id.charAt(length - 1);
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
return false;
}
break;
case 3:
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
last = id.charAt(length - 1);
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
&& last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
&& last != '0' && last != '4' && last != '8') {
return false;
}
break;
default:
// number & 0x03 is always in [0,3]
throw new AssertionError("Impossible case");
return false;
}
break;
default:
// number & 0x03 is always in [0,3]
throw new AssertionError("Impossible case");
}
for (int i = 0; i < length; ++i) {
final char c = id.charAt(i);
final boolean allowed =
(c >= '0' && c <= '9') ||
(c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
c == '-' || c == '_';
Expand Down Expand Up @@ -244,16 +244,16 @@ public static BytesRef encodeId(String id) {
}
}

private static String decodeNumericId(byte[] idBytes) {
assert Byte.toUnsignedInt(idBytes[0]) == NUMERIC;
int length = (idBytes.length - 1) * 2;
private static String decodeNumericId(byte[] idBytes, int offset, int len) {
assert Byte.toUnsignedInt(idBytes[offset]) == NUMERIC;
int length = (len - 1) * 2;
char[] chars = new char[length];
for (int i = 1; i < idBytes.length; ++i) {
final int b = Byte.toUnsignedInt(idBytes[i]);
for (int i = 1; i < len; ++i) {
final int b = Byte.toUnsignedInt(idBytes[offset + i]);
final int b1 = (b >>> 4);
final int b2 = b & 0x0f;
chars[(i - 1) * 2] = (char) (b1 + '0');
if (i == idBytes.length - 1 && b2 == 0x0f) {
if (i == len - 1 && b2 == 0x0f) {
length--;
break;
}
Expand All @@ -262,33 +262,41 @@ private static String decodeNumericId(byte[] idBytes) {
return new String(chars, 0, length);
}

private static String decodeUtf8Id(byte[] idBytes) {
assert Byte.toUnsignedInt(idBytes[0]) == UTF8;
return new BytesRef(idBytes, 1, idBytes.length - 1).utf8ToString();
private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
assert Byte.toUnsignedInt(idBytes[offset]) == UTF8;
return new BytesRef(idBytes, offset + 1, length - 1).utf8ToString();
}

private static String decodeBase64Id(byte[] idBytes) {
assert Byte.toUnsignedInt(idBytes[0]) <= BASE64_ESCAPE;
if (Byte.toUnsignedInt(idBytes[0]) == BASE64_ESCAPE) {
idBytes = Arrays.copyOfRange(idBytes, 1, idBytes.length);
private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
} else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
}
return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
}

/** Decode an indexed id back to its original form.
* @see #encodeId */
public static String decodeId(byte[] idBytes) {
if (idBytes.length == 0) {
return decodeId(idBytes, 0, idBytes.length);
}

/** Decode an indexed id back to its original form.
* @see #encodeId */
public static String decodeId(byte[] idBytes, int offset, int length) {
if (length == 0) {
throw new IllegalArgumentException("Ids can't be empty");
}
final int magicChar = Byte.toUnsignedInt(idBytes[0]);
final int magicChar = Byte.toUnsignedInt(idBytes[offset]);
switch (magicChar) {
case NUMERIC:
return decodeNumericId(idBytes);
case UTF8:
return decodeUtf8Id(idBytes);
default:
return decodeBase64Id(idBytes);
case NUMERIC:
return decodeNumericId(idBytes, offset, length);
case UTF8:
return decodeUtf8Id(idBytes, offset, length);
default:
return decodeBase64Id(idBytes, offset, length);
}
}
}
23 changes: 20 additions & 3 deletions core/src/test/java/org/elasticsearch/index/mapper/UidTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public void testEncodeUTF8Ids() {
for (int iter = 0; iter < iters; ++iter) {
final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
BytesRef encoded = Uid.encodeId(id);
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
assertEquals(id, doDecodeId(encoded));
assertTrue(encoded.length <= 1 + new BytesRef(id).length);
}
}
Expand All @@ -93,7 +93,7 @@ public void testEncodeNumericIds() {
id = "0" + id;
}
BytesRef encoded = Uid.encodeId(id);
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
assertEquals(id, doDecodeId(encoded));
assertEquals(1 + (id.length() + 1) / 2, encoded.length);
}
}
Expand All @@ -105,9 +105,26 @@ public void testEncodeBase64Ids() {
random().nextBytes(binaryId);
final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
BytesRef encoded = Uid.encodeId(id);
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
assertEquals(id, doDecodeId(encoded));
assertTrue(encoded.length <= 1 + binaryId.length);
}
}

private static String doDecodeId(BytesRef encoded) {

if (randomBoolean()) {
return Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length));
} else {
if (randomBoolean()) {
BytesRef slicedCopy = new BytesRef(randomIntBetween(encoded.length + 1, encoded.length + 100));
slicedCopy.offset = randomIntBetween(1, slicedCopy.bytes.length - encoded.length);
slicedCopy.length = encoded.length;
System.arraycopy(encoded.bytes, encoded.offset, slicedCopy.bytes, slicedCopy.offset, encoded.length);
assertArrayEquals(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length),
Arrays.copyOfRange(slicedCopy.bytes, slicedCopy.offset, slicedCopy.offset + slicedCopy.length));
encoded = slicedCopy;
}
return Uid.decodeId(encoded.bytes, encoded.offset, encoded.length);
}
}
}