Skip to content

Commit

Permalink
fix: dictionary decimal vector optimization (apache#705)
Browse files Browse the repository at this point in the history
## Which issue does this PR close?

Part of apache#679 and apache#670
Related apache#490

## Rationale for this change

For dictionary decimal vectors, it was unpacking even for Int and Long decimals that used more memory than necessary.

## What changes are included in this PR?

Unpack only for Decimal 128

## How are these changes tested?

Existing test

(cherry picked from commit c1b7c7d)
  • Loading branch information
kazuyukitanimura authored and huaxingao committed Aug 7, 2024
1 parent 48e87f8 commit dd79efe
Showing 1 changed file with 13 additions and 18 deletions.
31 changes: 13 additions & 18 deletions common/src/main/java/org/apache/comet/vector/CometDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public class CometDictionary implements AutoCloseable {
public CometDictionary(CometPlainVector values) {
this.values = values;
this.numValues = values.numValues();
initialize();
}

public void setDictionaryVector(CometPlainVector values) {
Expand Down Expand Up @@ -83,6 +82,19 @@ public byte[] decodeToBinary(int index) {
case FIXEDSIZEBINARY:
return values.getBinary(index);
case DECIMAL:
if (binaries == null) {
// We only need to copy values for decimal 128 type as random access
// to the dictionary is not efficient for decimal (it needs to copy
// the value to a new byte array everytime).
ByteArrayWrapper[] binaries = new ByteArrayWrapper[numValues];
for (int i = 0; i < numValues; i++) {
// Need copying here since we re-use byte array for decimal
byte[] bytes = new byte[DECIMAL_BYTE_WIDTH];
bytes = values.copyBinaryDecimal(i, bytes);
binaries[i] = new ByteArrayWrapper(bytes);
}
this.binaries = binaries;
}
return binaries[index].bytes;
default:
throw new IllegalArgumentException(
Expand All @@ -99,23 +111,6 @@ public void close() {
values.close();
}

private void initialize() {
switch (values.getValueVector().getMinorType()) {
case DECIMAL:
// We only need to copy values for decimal type as random access
// to the dictionary is not efficient for decimal (it needs to copy
// the value to a new byte array everytime).
binaries = new ByteArrayWrapper[numValues];
for (int i = 0; i < numValues; i++) {
// Need copying here since we re-use byte array for decimal
byte[] bytes = new byte[DECIMAL_BYTE_WIDTH];
bytes = values.copyBinaryDecimal(i, bytes);
binaries[i] = new ByteArrayWrapper(bytes);
}
break;
}
}

private static class ByteArrayWrapper {
private final byte[] bytes;

Expand Down

0 comments on commit dd79efe

Please sign in to comment.