Skip to content

Commit

Permalink
Lazily build hashtable for MapBlock
Browse files Browse the repository at this point in the history
Presto builds hashtable for MapBlocks eagerly when constructing the
MapBlock even it's not needed in the query. Building a hashtable could
take up to 40% CPU of the scan cost on a map column. This commit defers
the hashtable build to the time it's needed in SeekKey(). Note that we
only do this to the MapBlock, not the MapBlockBuilder to avoid complex
synchronization problems. The MapBlockBuilder will always build the
hashtable. As the result MergingPageOutput and PartitionOutputOperator
will still rebuild the hashtables when needed. The measurements shows
there will be less than 10% pages for MergingPageOutput to build the
hashtables. We will have a seperate PR to improve PartitionOutput
and avoid rebuilding the pages so as to avoid hashtable rebuilding.

Simple select checsum queries show over 40% CPU gain:
Test                          | After  | Before | Improvement
select 2 map columns checksum | 11.69d | 20.06d | 42%
Select 1 map column checksum  |  9.67d | 17.73d | 45%
  • Loading branch information
Ying Su authored and yingsu00 committed Nov 8, 2018
1 parent 3c15bb1 commit 23de11f
Show file tree
Hide file tree
Showing 7 changed files with 296 additions and 130 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

import com.facebook.presto.spi.type.Type;

import javax.annotation.Nullable;

import java.lang.invoke.MethodHandle;
import java.util.Arrays;
import java.util.Optional;
Expand All @@ -36,20 +38,23 @@ public abstract class AbstractMapBlock
protected final Type keyType;
protected final MethodHandle keyNativeHashCode;
protected final MethodHandle keyBlockNativeEquals;
protected final MethodHandle keyBlockHashCode;

public AbstractMapBlock(Type keyType, MethodHandle keyNativeHashCode, MethodHandle keyBlockNativeEquals)
public AbstractMapBlock(Type keyType, MethodHandle keyNativeHashCode, MethodHandle keyBlockNativeEquals, MethodHandle keyBlockHashCode)
{
this.keyType = requireNonNull(keyType, "keyType is null");
// keyNativeHashCode can only be null due to map block kill switch. deprecated.new-map-block
this.keyNativeHashCode = keyNativeHashCode;
// keyBlockNativeEquals can only be null due to map block kill switch. deprecated.new-map-block
this.keyBlockNativeEquals = keyBlockNativeEquals;
this.keyBlockHashCode = requireNonNull(keyBlockHashCode, "keyBlockHashCode is null");
}

protected abstract Block getRawKeyBlock();

protected abstract Block getRawValueBlock();

@Nullable
protected abstract int[] getHashTables();

/**
Expand All @@ -66,6 +71,8 @@ public AbstractMapBlock(Type keyType, MethodHandle keyNativeHashCode, MethodHand

protected abstract boolean[] getMapIsNull();

protected abstract void ensureHashTableLoaded();

int getOffset(int position)
{
return getOffsets()[position + getOffsetBase()];
Expand Down Expand Up @@ -108,21 +115,35 @@ public Block copyPositions(int[] positions, int offset, int length)
}

int[] hashTable = getHashTables();
int[] newHashTable = new int[newOffsets[newOffsets.length - 1] * HASH_MULTIPLIER];
int newHashIndex = 0;
for (int i = offset; i < offset + length; ++i) {
int position = positions[i];
int entriesStartOffset = getOffset(position);
int entriesEndOffset = getOffset(position + 1);
for (int hashIndex = entriesStartOffset * HASH_MULTIPLIER; hashIndex < entriesEndOffset * HASH_MULTIPLIER; hashIndex++) {
newHashTable[newHashIndex] = hashTable[hashIndex];
newHashIndex++;
int[] newHashTable = null;
if (hashTable != null) {
newHashTable = new int[newOffsets[newOffsets.length - 1] * HASH_MULTIPLIER];
int newHashIndex = 0;
for (int i = offset; i < offset + length; ++i) {
int position = positions[i];
int entriesStartOffset = getOffset(position);
int entriesEndOffset = getOffset(position + 1);
for (int hashIndex = entriesStartOffset * HASH_MULTIPLIER; hashIndex < entriesEndOffset * HASH_MULTIPLIER; hashIndex++) {
newHashTable[newHashIndex] = hashTable[hashIndex];
newHashIndex++;
}
}
}

Block newKeys = getRawKeyBlock().copyPositions(entriesPositions.elements(), 0, entriesPositions.size());
Block newValues = getRawValueBlock().copyPositions(entriesPositions.elements(), 0, entriesPositions.size());
return createMapBlockInternal(0, length, Optional.of(newMapIsNull), newOffsets, newKeys, newValues, newHashTable, keyType, keyBlockNativeEquals, keyNativeHashCode);
return createMapBlockInternal(
0,
length,
Optional.of(newMapIsNull),
newOffsets,
newKeys,
newValues,
Optional.ofNullable(newHashTable),
keyType,
keyBlockNativeEquals,
keyNativeHashCode,
keyBlockHashCode);
}

@Override
Expand All @@ -138,10 +159,11 @@ public Block getRegion(int position, int length)
getOffsets(),
getRawKeyBlock(),
getRawValueBlock(),
getHashTables(),
Optional.ofNullable(getHashTables()),
keyType,
keyBlockNativeEquals,
keyNativeHashCode);
keyNativeHashCode,
keyBlockHashCode);
}

@Override
Expand Down Expand Up @@ -174,7 +196,12 @@ public Block copyRegion(int position, int length)
int[] newOffsets = compactOffsets(getOffsets(), position + getOffsetBase(), length);
boolean[] mapIsNull = getMapIsNull();
boolean[] newMapIsNull = mapIsNull == null ? null : compactArray(mapIsNull, position + getOffsetBase(), length);
int[] newHashTable = compactArray(getHashTables(), startValueOffset * HASH_MULTIPLIER, (endValueOffset - startValueOffset) * HASH_MULTIPLIER);

int[] hashTables = getHashTables();
int[] newHashTable = null;
if (hashTables != null) {
newHashTable = compactArray(hashTables, startValueOffset * HASH_MULTIPLIER, (endValueOffset - startValueOffset) * HASH_MULTIPLIER);
}

if (newKeys == getRawKeyBlock() && newValues == getRawValueBlock() && newOffsets == getOffsets() && newMapIsNull == mapIsNull && newHashTable == getHashTables()) {
return this;
Expand All @@ -186,10 +213,11 @@ public Block copyRegion(int position, int length)
newOffsets,
newKeys,
newValues,
newHashTable,
Optional.ofNullable(newHashTable),
keyType,
keyBlockNativeEquals,
keyNativeHashCode);
keyNativeHashCode,
keyBlockHashCode);
}

@Override
Expand All @@ -205,12 +233,7 @@ public <T> T getObject(int position, Class<T> clazz)
return clazz.cast(new SingleMapBlock(
startEntryOffset * 2,
(endEntryOffset - startEntryOffset) * 2,
getRawKeyBlock(),
getRawValueBlock(),
getHashTables(),
keyType,
keyNativeHashCode,
keyBlockNativeEquals));
this));
}

@Override
Expand All @@ -230,7 +253,12 @@ public Block getSingleValueBlock(int position)
int valueLength = endValueOffset - startValueOffset;
Block newKeys = getRawKeyBlock().copyRegion(startValueOffset, valueLength);
Block newValues = getRawValueBlock().copyRegion(startValueOffset, valueLength);
int[] newHashTable = Arrays.copyOfRange(getHashTables(), startValueOffset * HASH_MULTIPLIER, endValueOffset * HASH_MULTIPLIER);

int[] hashTables = getHashTables();
int[] newHashTable = null;
if (hashTables != null) {
newHashTable = Arrays.copyOfRange(hashTables, startValueOffset * HASH_MULTIPLIER, endValueOffset * HASH_MULTIPLIER);
}

return createMapBlockInternal(
0,
Expand All @@ -239,10 +267,11 @@ public Block getSingleValueBlock(int position)
new int[] {0, valueLength},
newKeys,
newValues,
newHashTable,
Optional.ofNullable(newHashTable),
keyType,
keyBlockNativeEquals,
keyNativeHashCode);
keyNativeHashCode,
keyBlockHashCode);
}

@Override
Expand Down
108 changes: 80 additions & 28 deletions presto-spi/src/main/java/com/facebook/presto/spi/block/MapBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import static com.facebook.presto.spi.block.MapBlockBuilder.buildHashTable;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.airlift.slice.SizeOf.sizeOfIntArray;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

Expand All @@ -42,7 +43,7 @@ public class MapBlock
private final int[] offsets;
private final Block keyBlock;
private final Block valueBlock;
private final int[] hashTables; // hash to location in map;
private volatile int[] hashTables; // hash to location in map. Writes to the field is protected by "this" monitor.

private volatile long sizeInBytes;
private final long retainedSizeInBytes;
Expand All @@ -68,20 +69,6 @@ public static MapBlock fromKeyValueBlock(
validateConstructorArguments(0, offsets.length - 1, mapIsNull.orElse(null), offsets, keyBlock, valueBlock, mapType.getKeyType(), keyBlockNativeEquals, keyNativeHashCode);

int mapCount = offsets.length - 1;
int elementCount = keyBlock.getPositionCount();
int[] hashTables = new int[elementCount * HASH_MULTIPLIER];
Arrays.fill(hashTables, -1);
for (int i = 0; i < mapCount; i++) {
int keyOffset = offsets[i];
int keyCount = offsets[i + 1] - keyOffset;
if (keyCount < 0) {
throw new IllegalArgumentException(format("Offset is not monotonically ascending. offsets[%s]=%s, offsets[%s]=%s", i, offsets[i], i + 1, offsets[i + 1]));
}
if (mapIsNull.isPresent() && mapIsNull.get()[i] && keyCount != 0) {
throw new IllegalArgumentException("A null map must have zero entries");
}
buildHashTable(keyBlock, keyOffset, keyCount, keyBlockHashCode, hashTables, keyOffset * HASH_MULTIPLIER, keyCount * HASH_MULTIPLIER);
}

return createMapBlockInternal(
0,
Expand All @@ -90,10 +77,11 @@ public static MapBlock fromKeyValueBlock(
offsets,
keyBlock,
valueBlock,
hashTables,
Optional.empty(),
mapType.getKeyType(),
keyBlockNativeEquals,
keyNativeHashCode);
keyNativeHashCode,
keyBlockHashCode);
}

/**
Expand All @@ -112,13 +100,25 @@ public static MapBlock createMapBlockInternal(
int[] offsets,
Block keyBlock,
Block valueBlock,
int[] hashTables,
Optional<int[]> hashTables,
Type keyType,
MethodHandle keyBlockNativeEquals,
MethodHandle keyNativeHashCode)
MethodHandle keyNativeHashCode,
MethodHandle keyBlockHashCode)
{
validateConstructorArguments(startOffset, positionCount, mapIsNull.orElse(null), offsets, keyBlock, valueBlock, keyType, keyBlockNativeEquals, keyNativeHashCode);
return new MapBlock(startOffset, positionCount, mapIsNull.orElse(null), offsets, keyBlock, valueBlock, hashTables, keyType, keyBlockNativeEquals, keyNativeHashCode);
return new MapBlock(
startOffset,
positionCount,
mapIsNull.orElse(null),
offsets,
keyBlock,
valueBlock,
hashTables.orElse(null),
keyType,
keyBlockNativeEquals,
keyNativeHashCode,
keyBlockHashCode);
}

private static void validateConstructorArguments(
Expand Down Expand Up @@ -171,15 +171,15 @@ private MapBlock(
int[] offsets,
Block keyBlock,
Block valueBlock,
int[] hashTables,
@Nullable int[] hashTables,
Type keyType,
MethodHandle keyBlockNativeEquals,
MethodHandle keyNativeHashCode)
MethodHandle keyNativeHashCode,
MethodHandle keyBlockHashCode)
{
super(keyType, keyNativeHashCode, keyBlockNativeEquals);
super(keyType, keyNativeHashCode, keyBlockNativeEquals, keyBlockHashCode);

requireNonNull(hashTables, "hashTables is null");
if (hashTables.length < keyBlock.getPositionCount() * HASH_MULTIPLIER) {
if (hashTables != null && hashTables.length < keyBlock.getPositionCount() * HASH_MULTIPLIER) {
throw new IllegalArgumentException(format("keyBlock/valueBlock size does not match hash table size: %s %s", keyBlock.getPositionCount(), hashTables.length));
}

Expand All @@ -192,7 +192,16 @@ private MapBlock(
this.hashTables = hashTables;

this.sizeInBytes = -1;
this.retainedSizeInBytes = INSTANCE_SIZE + keyBlock.getRetainedSizeInBytes() + valueBlock.getRetainedSizeInBytes() + sizeOf(offsets) + sizeOf(mapIsNull) + sizeOf(hashTables);

// We will add the hashtable size to the retained size even if it's not built yet. This could be overestimating
// but is necessary to avoid reliability issues. Currently the memory counting framework only pull the retained
// size once for each operator so updating in the middle of the processing would not work.
this.retainedSizeInBytes = INSTANCE_SIZE
+ keyBlock.getRetainedSizeInBytes()
+ valueBlock.getRetainedSizeInBytes()
+ sizeOf(offsets)
+ sizeOf(mapIsNull)
+ sizeOfIntArray(keyBlock.getPositionCount() * HASH_MULTIPLIER); // hashtable size if it was built
}

@Override
Expand Down Expand Up @@ -303,9 +312,52 @@ public Block getLoadedBlock()
offsets,
keyBlock,
loadedValueBlock,
hashTables,
Optional.ofNullable(hashTables),
keyType,
keyBlockNativeEquals,
keyNativeHashCode);
keyNativeHashCode,
keyBlockHashCode);
}

@Override
protected void ensureHashTableLoaded()
{
if (this.hashTables != null) {
return;
}

// This can only happen for MapBlock, not MapBlockBuilder because the latter always has non-null hashtables
synchronized (this) {
if (this.hashTables != null) {
return;
}

int[] offsets = getOffsets();
int elementCount = getRawKeyBlock().getPositionCount();
int mapCount = getPositionCount();
boolean[] mapIsNull = getMapIsNull();

int[] hashTables = new int[elementCount * HASH_MULTIPLIER];
Arrays.fill(hashTables, -1);
for (int i = 0; i < mapCount; i++) {
int keyOffset = offsets[i];
int keyCount = offsets[i + 1] - keyOffset;
if (keyCount < 0) {
throw new IllegalArgumentException(format("Offset is not monotonically ascending. offsets[%s]=%s, offsets[%s]=%s", i, offsets[i], i + 1, offsets[i + 1]));
}
if (mapIsNull != null && mapIsNull[i] && keyCount != 0) {
throw new IllegalArgumentException("A null map must have zero entries");
}
buildHashTable(
getRawKeyBlock(),
keyOffset,
keyCount,
keyBlockHashCode,
hashTables,
keyOffset * HASH_MULTIPLIER,
keyCount * HASH_MULTIPLIER);
}
this.hashTables = hashTables;
}
}
}
Loading

0 comments on commit 23de11f

Please sign in to comment.