diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dba0893a67f0..892648077507 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -212,6 +212,10 @@ Improvements * GITHUB#13055: Make DEFAULT_STOP_TAGS in KoreanPartOfSpeechStopFilter immutable (Dmitry Cherniachenko) +* GITHUB#888: Use native byte order varhandles to spare CPU's byte swapping. + Tests are running with random byte order to ensure that the order does not affect correctness + of code. Native order was enabled for LZ4 compression. (Uwe Schindler) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java index 60f0914f5bf6..850cb2618093 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java @@ -29,6 +29,29 @@ public final class BitUtil { private BitUtil() {} // no instance + /** + * Native byte order. + * + *

Warning: This constant is {@link ByteOrder#nativeOrder()} only in production environments, + * during testing we randomize it. If you need to communicate with native APIs (e.g., Java's + * Panama API), use {@link ByteOrder#nativeOrder()}. + */ + public static final ByteOrder NATIVE_BYTE_ORDER = getNativeByteOrder(); + + private static ByteOrder getNativeByteOrder() { + try { + var prop = System.getProperty("tests.seed"); + if (prop != null) { + return (prop.hashCode() % 2 == 0) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; + } + } catch ( + @SuppressWarnings("unused") + SecurityException se) { + // fall-through + } + return ByteOrder.nativeOrder(); + } + /** * A {@link VarHandle} to read/write little endian {@code short} from/to a byte array. Shape: * {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short @@ -65,6 +88,62 @@ private BitUtil() {} // no instance public static final VarHandle VH_LE_DOUBLE = MethodHandles.byteArrayViewVarHandle(double[].class, ByteOrder.LITTLE_ENDIAN); + /** + * A {@link VarHandle} to read/write native endian {@code short} from/to a byte array. Shape: + * {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short + * val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_SHORT = + MethodHandles.byteArrayViewVarHandle(short[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code int} from a byte array. Shape: {@code + * int vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, int val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_INT = + MethodHandles.byteArrayViewVarHandle(int[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code long} from a byte array. Shape: {@code + * long vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, long val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_LONG = + MethodHandles.byteArrayViewVarHandle(long[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code float} from a byte array. Shape: {@code + * float vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, float val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_FLOAT = + MethodHandles.byteArrayViewVarHandle(float[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code double} from a byte array. Shape: {@code + * double vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, double val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_DOUBLE = + MethodHandles.byteArrayViewVarHandle(double[].class, NATIVE_BYTE_ORDER); + /** * A {@link VarHandle} to read/write big endian {@code short} from a byte array. Shape: {@code * short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short val)} diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java index 67bbdc96ab2b..9033226a297e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java +++ b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java @@ -68,10 +68,8 @@ private static int hashHC(int i) { } private static int readInt(byte[] buf, int i) { - // we hardcode LITTLE ENDIAN here as this is most performant on most platforms. - // According to LZ4's alogrithm the endianness does not matter at all, but we - // want to prevent indexes to differ just because of platform endianness! - return (int) BitUtil.VH_LE_INT.get(buf, i); + // According to LZ4's algorithm the endianness does not matter at all: + return (int) BitUtil.VH_NATIVE_INT.get(buf, i); } private static int commonBytes(byte[] b, int o1, int o2, int limit) {