Skip to content

Commit

Permalink
Add metadata classes for map statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
sdruzkin authored and ARUNACHALAM THIRUPATHI committed Jun 25, 2022
1 parent 66079f8 commit e53d846
Show file tree
Hide file tree
Showing 9 changed files with 613 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ private static ColumnStatistics toColumnStatistics(HiveWriterVersion hiveWriterV
null,
null,
statistics.hasBinaryStatistics() ? toBinaryStatistics(statistics.getBinaryStatistics()) : null,
null,
bloomFilter);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ private static ColumnStatistics toColumnStatistics(HiveWriterVersion hiveWriterV
statistics.hasDateStatistics() ? toDateStatistics(hiveWriterVersion, statistics.getDateStatistics(), isRowGroup) : null,
statistics.hasDecimalStatistics() ? toDecimalStatistics(statistics.getDecimalStatistics()) : null,
statistics.hasBinaryStatistics() ? toBinaryStatistics(statistics.getBinaryStatistics()) : null,
null,
bloomFilter);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static com.facebook.presto.orc.metadata.statistics.DoubleStatisticsBuilder.mergeDoubleStatistics;
import static com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder.mergeIntegerStatistics;
import static com.facebook.presto.orc.metadata.statistics.LongDecimalStatisticsBuilder.mergeDecimalStatistics;
import static com.facebook.presto.orc.metadata.statistics.MapColumnStatisticsBuilder.mergeMapStatistics;
import static com.facebook.presto.orc.metadata.statistics.StringStatisticsBuilder.mergeStringStatistics;
import static com.google.common.base.MoreObjects.toStringHelper;

Expand Down Expand Up @@ -107,6 +108,11 @@ public BinaryStatistics getBinaryStatistics()
return null;
}

public MapStatistics getMapStatistics()
{
return null;
}

public HiveBloomFilter getBloomFilter()
{
return bloomFilter;
Expand Down Expand Up @@ -196,6 +202,7 @@ public static ColumnStatistics mergeColumnStatistics(List<ColumnStatistics> stat
mergeDateStatistics(stats).orElse(null),
mergeDecimalStatistics(stats).orElse(null),
mergeBinaryStatistics(stats).orElse(null),
mergeMapStatistics(stats).orElse(null),
null);
}

Expand All @@ -208,6 +215,7 @@ public static ColumnStatistics createColumnStatistics(
DateStatistics dateStatistics,
DecimalStatistics decimalStatistics,
BinaryStatistics binaryStatistics,
MapStatistics mapStatistics,
HiveBloomFilter bloomFilter)
{
if (booleanStatistics != null) {
Expand Down Expand Up @@ -238,6 +246,10 @@ public static ColumnStatistics createColumnStatistics(
return new BinaryColumnStatistics(numberOfValues, bloomFilter, binaryStatistics);
}

if (mapStatistics != null) {
return new MapColumnStatistics(numberOfValues, bloomFilter, mapStatistics);
}

return new ColumnStatistics(numberOfValues, bloomFilter);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;

import com.google.common.base.MoreObjects.ToStringHelper;
import org.openjdk.jol.info.ClassLayout;

import java.util.Objects;

import static java.util.Objects.requireNonNull;

public class MapColumnStatistics
extends ColumnStatistics
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(MapColumnStatistics.class).instanceSize();
private final MapStatistics mapStatistics;

public MapColumnStatistics(Long numberOfValues, HiveBloomFilter bloomFilter, MapStatistics mapStatistics)
{
super(numberOfValues, bloomFilter);
this.mapStatistics = requireNonNull(mapStatistics, "mapStatistics is null");
}

@Override
public MapStatistics getMapStatistics()
{
return mapStatistics;
}

@Override
public long getTotalValueSizeInBytes()
{
long size = 0;
for (MapStatisticsEntry entry : mapStatistics.getEntries()) {
size += entry.getColumnStatistics().getTotalValueSizeInBytes();
}
return size;
}

@Override
public ColumnStatistics withBloomFilter(HiveBloomFilter bloomFilter)
{
return new MapColumnStatistics(getNumberOfValues(), bloomFilter, mapStatistics);
}

@Override
public long getRetainedSizeInBytes()
{
return INSTANCE_SIZE + getMembersSizeInBytes() + mapStatistics.getRetainedSizeInBytes();
}

@Override
public void addHash(StatisticsHasher hasher)
{
super.addHash(hasher);
hasher.putOptionalHashable(mapStatistics);
}

@Override
protected ToStringHelper getToStringHelper()
{
return super.getToStringHelper()
.add("mapStatistics", mapStatistics);
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MapColumnStatistics that = (MapColumnStatistics) o;
return equalsInternal(that) && Objects.equals(mapStatistics, that.mapStatistics);
}

public int hashCode()
{
return Objects.hash(super.hashCode(), mapStatistics);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.orc.proto.DwrfProto;
import com.google.common.collect.ImmutableList;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics;
import static java.util.Objects.requireNonNull;

public class MapColumnStatisticsBuilder
implements StatisticsBuilder
{
private long nonNullValueCount;
private boolean hasEntries;
private final ImmutableList.Builder<MapStatisticsEntry> entries = new ImmutableList.Builder<>();

@Override
public void addBlock(Type type, Block block)
{
throw new UnsupportedOperationException();
}

// Note: MapColumnStatisticsBuilder doesn't check the uniqueness of the keys
public void addMapStatistics(DwrfProto.KeyInfo key, ColumnStatistics columnStatistics)
{
requireNonNull(key, "key is null");
requireNonNull(columnStatistics, "columnStatistics is null");
nonNullValueCount += columnStatistics.getNumberOfValues();
hasEntries = true;
entries.add(new MapStatisticsEntry(key, columnStatistics));
}

private Optional<MapStatistics> buildMapStatistics()
{
if (hasEntries) {
MapStatistics mapStatistics = new MapStatistics(entries.build());
return Optional.of(mapStatistics);
}
return Optional.empty();
}

@Override
public ColumnStatistics buildColumnStatistics()
{
if (hasEntries) {
MapStatistics mapStatistics = new MapStatistics(entries.build());
return new MapColumnStatistics(nonNullValueCount, null, mapStatistics);
}
return new ColumnStatistics(nonNullValueCount, null);
}

public static Optional<MapStatistics> mergeMapStatistics(List<ColumnStatistics> stats)
{
Map<DwrfProto.KeyInfo, List<ColumnStatistics>> columnStatisticsByKey = new LinkedHashMap<>();

for (ColumnStatistics columnStatistics : stats) {
if (columnStatistics.getNumberOfValues() > 0) {
MapStatistics partialStatistics = columnStatistics.getMapStatistics();
if (partialStatistics == null) {
// there are non-null values but no statistics, so we can not say anything about the data
return Optional.empty();
}

// collect column stats for each key for merging later
for (MapStatisticsEntry entry : partialStatistics.getEntries()) {
List<ColumnStatistics> allKeyStats = columnStatisticsByKey.computeIfAbsent(entry.getKey(), (k) -> new ArrayList<>());
allKeyStats.add(entry.getColumnStatistics());
}
}
}

// merge all column stats for each key
MapColumnStatisticsBuilder mapStatisticsBuilder = new MapColumnStatisticsBuilder();
for (Map.Entry<DwrfProto.KeyInfo, List<ColumnStatistics>> entry : columnStatisticsByKey.entrySet()) {
ColumnStatistics mergedColumnStatistics = mergeColumnStatistics(entry.getValue());
DwrfProto.KeyInfo key = entry.getKey();
mapStatisticsBuilder.addMapStatistics(key, mergedColumnStatistics);
}

return mapStatisticsBuilder.buildMapStatistics();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.metadata.statistics;

import org.openjdk.jol.info.ClassLayout;

import java.util.List;
import java.util.Objects;

import static com.facebook.presto.orc.metadata.statistics.StatisticsHasher.Hashable;
import static com.google.common.base.MoreObjects.toStringHelper;
import static java.util.Objects.requireNonNull;

public class MapStatistics
implements Hashable
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(MapStatistics.class).instanceSize();
private final List<MapStatisticsEntry> entries;

public MapStatistics(List<MapStatisticsEntry> entries)
{
this.entries = requireNonNull(entries, "entries is null");
}

public List<MapStatisticsEntry> getEntries()
{
return entries;
}

public long getRetainedSizeInBytes()
{
long entriesSize = 0;
for (MapStatisticsEntry entry : entries) {
entriesSize += entry.getRetainedSizeInBytes();
}
return INSTANCE_SIZE + entriesSize;
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MapStatistics that = (MapStatistics) o;
return Objects.equals(entries, that.entries);
}

@Override
public int hashCode()
{
return Objects.hash(entries);
}

@Override
public String toString()
{
return toStringHelper(this)
.add("entries", entries)
.toString();
}

@Override
public void addHash(StatisticsHasher hasher)
{
entries.forEach(hasher::putOptionalHashable);
}
}
Loading

0 comments on commit e53d846

Please sign in to comment.