Skip to content

Commit

Permalink
LUCENE-10572: Add support for varhandles in native byte order (still …
Browse files Browse the repository at this point in the history
…randomized during tests) (#888)
  • Loading branch information
uschindler committed Feb 5, 2024
1 parent 3da32a2 commit 0f33d86
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 4 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ Improvements

* GITHUB#13055: Make DEFAULT_STOP_TAGS in KoreanPartOfSpeechStopFilter immutable (Dmitry Cherniachenko)

* GITHUB#888: Use native byte order varhandles to spare CPU's byte swapping.
Tests are running with random byte order to ensure that the order does not affect correctness
of code. Native order was enabled for LZ4 compression. (Uwe Schindler)

Optimizations
---------------------

Expand Down
79 changes: 79 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/BitUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,29 @@ public final class BitUtil {

private BitUtil() {} // no instance

/**
* Native byte order.
*
* <p>Warning: This constant is {@link ByteOrder#nativeOrder()} only in production environments,
* during testing we randomize it. If you need to communicate with native APIs (e.g., Java's
* Panama API), use {@link ByteOrder#nativeOrder()}.
*/
public static final ByteOrder NATIVE_BYTE_ORDER = getNativeByteOrder();

private static ByteOrder getNativeByteOrder() {
try {
var prop = System.getProperty("tests.seed");
if (prop != null) {
return (prop.hashCode() % 2 == 0) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN;
}
} catch (
@SuppressWarnings("unused")
SecurityException se) {
// fall-through
}
return ByteOrder.nativeOrder();
}

/**
* A {@link VarHandle} to read/write little endian {@code short} from/to a byte array. Shape:
* {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short
Expand Down Expand Up @@ -65,6 +88,62 @@ private BitUtil() {} // no instance
public static final VarHandle VH_LE_DOUBLE =
MethodHandles.byteArrayViewVarHandle(double[].class, ByteOrder.LITTLE_ENDIAN);

/**
* A {@link VarHandle} to read/write native endian {@code short} from/to a byte array. Shape:
* {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short
* val)}
*
* <p>Warning: This handle uses default order only in production environments, during testing we
* randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link
* ByteOrder#nativeOrder()}.
*/
public static final VarHandle VH_NATIVE_SHORT =
MethodHandles.byteArrayViewVarHandle(short[].class, NATIVE_BYTE_ORDER);

/**
* A {@link VarHandle} to read/write native endian {@code int} from a byte array. Shape: {@code
* int vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, int val)}
*
* <p>Warning: This handle uses default order only in production environments, during testing we
* randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link
* ByteOrder#nativeOrder()}.
*/
public static final VarHandle VH_NATIVE_INT =
MethodHandles.byteArrayViewVarHandle(int[].class, NATIVE_BYTE_ORDER);

/**
* A {@link VarHandle} to read/write native endian {@code long} from a byte array. Shape: {@code
* long vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, long val)}
*
* <p>Warning: This handle uses default order only in production environments, during testing we
* randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link
* ByteOrder#nativeOrder()}.
*/
public static final VarHandle VH_NATIVE_LONG =
MethodHandles.byteArrayViewVarHandle(long[].class, NATIVE_BYTE_ORDER);

/**
* A {@link VarHandle} to read/write native endian {@code float} from a byte array. Shape: {@code
* float vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, float val)}
*
* <p>Warning: This handle uses default order only in production environments, during testing we
* randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link
* ByteOrder#nativeOrder()}.
*/
public static final VarHandle VH_NATIVE_FLOAT =
MethodHandles.byteArrayViewVarHandle(float[].class, NATIVE_BYTE_ORDER);

/**
* A {@link VarHandle} to read/write native endian {@code double} from a byte array. Shape: {@code
* double vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, double val)}
*
* <p>Warning: This handle uses default order only in production environments, during testing we
* randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link
* ByteOrder#nativeOrder()}.
*/
public static final VarHandle VH_NATIVE_DOUBLE =
MethodHandles.byteArrayViewVarHandle(double[].class, NATIVE_BYTE_ORDER);

/**
* A {@link VarHandle} to read/write big endian {@code short} from a byte array. Shape: {@code
* short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short val)}
Expand Down
6 changes: 2 additions & 4 deletions lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,8 @@ private static int hashHC(int i) {
}

private static int readInt(byte[] buf, int i) {
// we hardcode LITTLE ENDIAN here as this is most performant on most platforms.
// According to LZ4's alogrithm the endianness does not matter at all, but we
// want to prevent indexes to differ just because of platform endianness!
return (int) BitUtil.VH_LE_INT.get(buf, i);
// According to LZ4's algorithm the endianness does not matter at all:
return (int) BitUtil.VH_NATIVE_INT.get(buf, i);
}

private static int commonBytes(byte[] b, int o1, int o2, int limit) {
Expand Down

0 comments on commit 0f33d86

Please sign in to comment.