Skip to content

Commit

Permalink
Improve performance of parquet dictionary to domain conversion
Browse files Browse the repository at this point in the history
Improved logic for constructing domain from dictionary values in
TupleDomainParquetPredicate by providing builder in SortedRangeSet
which caches comparisonOperator and re-uses it for Range construction

Benchmark                                                  Mode  Cnt  Score Before      Score After      Units
BenchmarkTupleDomainParquetPredicate.domainFromDictionary  avgt   10  354.194 ± 29.631  246.769 ± 12.390 ms/op
  • Loading branch information
raunaqmorarka committed Dec 2, 2022
1 parent 0ef819d commit 56936ba
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,44 @@ private String formatRanges(ConnectorSession session, int limit)
.collect(joining(", ", "{", "}"));
}

public static Builder builder(Type type, int expectedSize)
{
return new SortedRangeSet.Builder(type, expectedSize);
}

public static class Builder
{
private final Type type;
private final MethodHandle rangeComparisonOperator;
private final List<Range> ranges;

private Builder(Type type, int expectedSize)
{
this.type = requireNonNull(type, "type is null");
// Calculating the comparison operator once instead of per range to avoid hitting TypeOperators cache
this.rangeComparisonOperator = Range.getComparisonOperator(type);
this.ranges = new ArrayList<>(expectedSize);
}

public Builder addRangeInclusive(Object lowValue, Object highValue)
{
ranges.add(new Range(type, true, Optional.of(lowValue), true, Optional.of(highValue), rangeComparisonOperator));
return this;
}

public Builder addValue(Object value)
{
Optional<Object> valueAsOptional = Optional.of(value);
ranges.add(new Range(type, true, valueAsOptional, true, valueAsOptional, rangeComparisonOperator));
return this;
}

public SortedRangeSet build()
{
return SortedRangeSet.of(ranges);
}
}

static SortedRangeSet buildFromUnsortedRanges(Type type, Collection<Range> unsortedRanges)
{
requireNonNull(type, "type is null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import io.trino.parquet.dictionary.Dictionary;
import io.trino.plugin.base.type.TrinoTimestampEncoder;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.Range;
import io.trino.spi.predicate.SortedRangeSet;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.predicate.ValueSet;
import io.trino.spi.type.DecimalType;
Expand Down Expand Up @@ -269,52 +269,52 @@ private static Domain getDomain(
}

if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}

ranges.add(Range.range(type, min, true, max, true));
rangesBuilder.addRangeInclusive(min, max);
}

return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}

if (type instanceof DecimalType) {
DecimalType decimalType = (DecimalType) type;
List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
if (decimalType.isShort()) {
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);

long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
long maxValue = max instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);

if (isStatisticsOverflow(type, minValue, maxValue)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}

ranges.add(Range.range(type, minValue, true, maxValue, true));
rangesBuilder.addRangeInclusive(minValue, maxValue);
}
}
else {
for (int i = 0; i < minimums.size(); i++) {
Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());

ranges.add(Range.range(type, min, true, max, true));
rangesBuilder.addRangeInclusive(min, max);
}
}

return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}

if (type.equals(REAL)) {
List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
Float min = (Float) minimums.get(i);
Float max = (Float) maximums.get(i);
Expand All @@ -323,13 +323,13 @@ private static Domain getDomain(
return Domain.create(ValueSet.all(type), hasNullValue);
}

ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
rangesBuilder.addRangeInclusive((long) floatToRawIntBits(min), (long) floatToRawIntBits(max));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}

if (type.equals(DOUBLE)) {
List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
Double min = (Double) minimums.get(i);
Double max = (Double) maximums.get(i);
Expand All @@ -338,25 +338,25 @@ private static Domain getDomain(
return Domain.create(ValueSet.all(type), hasNullValue);
}

ranges.add(Range.range(type, min, true, max, true));
rangesBuilder.addRangeInclusive(min, max);
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}

if (type instanceof VarcharType) {
List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
ranges.add(Range.range(type, min, true, max, true));
rangesBuilder.addRangeInclusive(min, max);
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}

if (type instanceof TimestampType) {
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
List<Object> values = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);
Expand All @@ -369,9 +369,9 @@ private static Domain getDomain(
return Domain.create(ValueSet.all(type), hasNullValue);
}

values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
rangesBuilder.addValue(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
}
return Domain.multipleValues(type, values, hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
Expand All @@ -387,19 +387,16 @@ private static Domain getDomain(
}
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);

List<Range> ranges = new ArrayList<>(minimums.size());
SortedRangeSet.Builder rangesBuilder = SortedRangeSet.builder(type, minimums.size());
for (int i = 0; i < minimums.size(); i++) {
long min = (long) minimums.get(i);
long max = (long) maximums.get(i);

ranges.add(Range.range(
type,
rangesBuilder.addRangeInclusive(
timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())),
true,
timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())),
true));
timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
return Domain.create(rangesBuilder.build(), hasNullValue);
}
}

Expand Down

0 comments on commit 56936ba

Please sign in to comment.