diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 02a863b2bb498..c867e5b3cb4c2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -298,6 +298,10 @@ public UnsafeMapData getMap(int ordinal) { return map; } + // This `hashCode` computation could consume much processor time for large data. + // If the computation becomes a bottleneck, we can use a light-weight logic; the first fixed bytes + // are used to compute `hashCode` (See `Vector.hashCode`). + // The same issue exists in `UnsafeMapData.hashCode`. @Override public int hashCode() { return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42);