From c1b626c0636821f4d7c085895359489e7dfa330f Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sat, 14 May 2022 01:29:45 +0200 Subject: [PATCH] Remove vInt-like encoding in ByteBlockPool and BytesRefHash and switch to native byte order for the length --- .../org/apache/lucene/util/ByteBlockPool.java | 11 +---- .../org/apache/lucene/util/BytesRefHash.java | 46 ++++--------------- 2 files changed, 10 insertions(+), 47 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 5bf5ffc9a79a..46703c97e0ff 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -301,15 +301,8 @@ void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int leng public void setBytesRef(BytesRef term, int textStart) { final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT]; int pos = textStart & BYTE_BLOCK_MASK; - if ((bytes[pos] & 0x80) == 0) { - // length is 1 byte - term.length = bytes[pos]; - term.offset = pos + 1; - } else { - // length is 2 bytes - term.length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF; - term.offset = pos + 2; - } + term.length = ((short) BitUtil.VH_NATIVE_SHORT.get(bytes, pos)) & 0x7FFF; + term.offset = pos + 2; assert term.length >= 0; } diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java index f70f79e8a4d3..732c09d6e686 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java @@ -173,17 +173,8 @@ private boolean equals(int id, BytesRef b) { final int textStart = bytesStart[id]; final byte[] bytes = pool.buffers[textStart >> BYTE_BLOCK_SHIFT]; int pos = textStart & BYTE_BLOCK_MASK; - final int length; - final int offset; - if ((bytes[pos] & 0x80) == 0) { - // length is 1 byte - length = bytes[pos]; - offset = pos + 1; - } else { - // length is 2 bytes - length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF; - offset = pos + 2; - } + final int length = ((short) BitUtil.VH_NATIVE_SHORT.get(bytes, pos)) & 0x7FFF; + final int offset = pos + 2; return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length); } @@ -270,22 +261,10 @@ public int add(BytesRef bytes) { bytesStart[e] = bufferUpto + pool.byteOffset; - // We first encode the length, followed by the - // bytes. Length is encoded as vInt, but will consume - // 1 or 2 bytes at most (we reject too-long terms, - // above). - if (length < 128) { - // 1 byte to store length - buffer[bufferUpto] = (byte) length; - pool.byteUpto += length + 1; - assert length >= 0 : "Length must be positive: " + length; - System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length); - } else { - // 2 byte to store length - BitUtil.VH_BE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000)); - pool.byteUpto += length + 2; - System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length); - } + BitUtil.VH_NATIVE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000)); + pool.byteUpto += length + 2; + System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length); + assert ids[hashPos] == -1; ids[hashPos] = e; @@ -384,17 +363,8 @@ private void rehash(final int newSize, boolean hashOnData) { final int off = bytesStart[e0]; final int start = off & BYTE_BLOCK_MASK; final byte[] bytes = pool.buffers[off >> BYTE_BLOCK_SHIFT]; - final int len; - int pos; - if ((bytes[start] & 0x80) == 0) { - // length is 1 byte - len = bytes[start]; - pos = start + 1; - } else { - len = ((short) BitUtil.VH_BE_SHORT.get(bytes, start)) & 0x7FFF; - pos = start + 2; - } - code = doHash(bytes, pos, len); + final int len = ((short) BitUtil.VH_NATIVE_SHORT.get(bytes, start)) & 0x7FFF; + code = doHash(bytes, start + 2, len); } else { code = bytesStart[e0]; }