From e53d8464907a30d65f78c17a783f3b6f1b686c39 Mon Sep 17 00:00:00 2001 From: Sergii Druzkin <65374769+sdruzkin@users.noreply.github.com> Date: Thu, 23 Jun 2022 17:32:44 -0700 Subject: [PATCH] Add metadata classes for map statistics --- .../orc/metadata/DwrfMetadataReader.java | 1 + .../orc/metadata/OrcMetadataReader.java | 1 + .../metadata/statistics/ColumnStatistics.java | 12 ++ .../statistics/MapColumnStatistics.java | 94 +++++++++++ .../MapColumnStatisticsBuilder.java | 102 ++++++++++++ .../metadata/statistics/MapStatistics.java | 82 ++++++++++ .../statistics/MapStatisticsEntry.java | 98 +++++++++++ .../statistics/TestMapColumnStatistics.java | 69 ++++++++ .../TestMapColumnStatisticsBuilder.java | 154 ++++++++++++++++++ 9 files changed, 613 insertions(+) create mode 100644 presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatistics.java create mode 100644 presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatisticsBuilder.java create mode 100644 presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatistics.java create mode 100644 presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatisticsEntry.java create mode 100644 presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatistics.java create mode 100644 presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatisticsBuilder.java diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/DwrfMetadataReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/DwrfMetadataReader.java index b5e45f3d6613..8cf4010e978b 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/DwrfMetadataReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/DwrfMetadataReader.java @@ -485,6 +485,7 @@ private static ColumnStatistics toColumnStatistics(HiveWriterVersion hiveWriterV null, null, statistics.hasBinaryStatistics() ? toBinaryStatistics(statistics.getBinaryStatistics()) : null, + null, bloomFilter); } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java index d8b512ddd161..e85077b22824 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java @@ -276,6 +276,7 @@ private static ColumnStatistics toColumnStatistics(HiveWriterVersion hiveWriterV statistics.hasDateStatistics() ? toDateStatistics(hiveWriterVersion, statistics.getDateStatistics(), isRowGroup) : null, statistics.hasDecimalStatistics() ? toDecimalStatistics(statistics.getDecimalStatistics()) : null, statistics.hasBinaryStatistics() ? toBinaryStatistics(statistics.getBinaryStatistics()) : null, + null, bloomFilter); } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/ColumnStatistics.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/ColumnStatistics.java index 5c69f02d37eb..cfa48a520db1 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/ColumnStatistics.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/ColumnStatistics.java @@ -26,6 +26,7 @@ import static com.facebook.presto.orc.metadata.statistics.DoubleStatisticsBuilder.mergeDoubleStatistics; import static com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder.mergeIntegerStatistics; import static com.facebook.presto.orc.metadata.statistics.LongDecimalStatisticsBuilder.mergeDecimalStatistics; +import static com.facebook.presto.orc.metadata.statistics.MapColumnStatisticsBuilder.mergeMapStatistics; import static com.facebook.presto.orc.metadata.statistics.StringStatisticsBuilder.mergeStringStatistics; import static com.google.common.base.MoreObjects.toStringHelper; @@ -107,6 +108,11 @@ public BinaryStatistics getBinaryStatistics() return null; } + public MapStatistics getMapStatistics() + { + return null; + } + public HiveBloomFilter getBloomFilter() { return bloomFilter; @@ -196,6 +202,7 @@ public static ColumnStatistics mergeColumnStatistics(List stat mergeDateStatistics(stats).orElse(null), mergeDecimalStatistics(stats).orElse(null), mergeBinaryStatistics(stats).orElse(null), + mergeMapStatistics(stats).orElse(null), null); } @@ -208,6 +215,7 @@ public static ColumnStatistics createColumnStatistics( DateStatistics dateStatistics, DecimalStatistics decimalStatistics, BinaryStatistics binaryStatistics, + MapStatistics mapStatistics, HiveBloomFilter bloomFilter) { if (booleanStatistics != null) { @@ -238,6 +246,10 @@ public static ColumnStatistics createColumnStatistics( return new BinaryColumnStatistics(numberOfValues, bloomFilter, binaryStatistics); } + if (mapStatistics != null) { + return new MapColumnStatistics(numberOfValues, bloomFilter, mapStatistics); + } + return new ColumnStatistics(numberOfValues, bloomFilter); } } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatistics.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatistics.java new file mode 100644 index 000000000000..edf7f650ae75 --- /dev/null +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatistics.java @@ -0,0 +1,94 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import com.google.common.base.MoreObjects.ToStringHelper; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +public class MapColumnStatistics + extends ColumnStatistics +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(MapColumnStatistics.class).instanceSize(); + private final MapStatistics mapStatistics; + + public MapColumnStatistics(Long numberOfValues, HiveBloomFilter bloomFilter, MapStatistics mapStatistics) + { + super(numberOfValues, bloomFilter); + this.mapStatistics = requireNonNull(mapStatistics, "mapStatistics is null"); + } + + @Override + public MapStatistics getMapStatistics() + { + return mapStatistics; + } + + @Override + public long getTotalValueSizeInBytes() + { + long size = 0; + for (MapStatisticsEntry entry : mapStatistics.getEntries()) { + size += entry.getColumnStatistics().getTotalValueSizeInBytes(); + } + return size; + } + + @Override + public ColumnStatistics withBloomFilter(HiveBloomFilter bloomFilter) + { + return new MapColumnStatistics(getNumberOfValues(), bloomFilter, mapStatistics); + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + getMembersSizeInBytes() + mapStatistics.getRetainedSizeInBytes(); + } + + @Override + public void addHash(StatisticsHasher hasher) + { + super.addHash(hasher); + hasher.putOptionalHashable(mapStatistics); + } + + @Override + protected ToStringHelper getToStringHelper() + { + return super.getToStringHelper() + .add("mapStatistics", mapStatistics); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MapColumnStatistics that = (MapColumnStatistics) o; + return equalsInternal(that) && Objects.equals(mapStatistics, that.mapStatistics); + } + + public int hashCode() + { + return Objects.hash(super.hashCode(), mapStatistics); + } +} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatisticsBuilder.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatisticsBuilder.java new file mode 100644 index 000000000000..5b533555fc43 --- /dev/null +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapColumnStatisticsBuilder.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.orc.proto.DwrfProto; +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics; +import static java.util.Objects.requireNonNull; + +public class MapColumnStatisticsBuilder + implements StatisticsBuilder +{ + private long nonNullValueCount; + private boolean hasEntries; + private final ImmutableList.Builder entries = new ImmutableList.Builder<>(); + + @Override + public void addBlock(Type type, Block block) + { + throw new UnsupportedOperationException(); + } + + // Note: MapColumnStatisticsBuilder doesn't check the uniqueness of the keys + public void addMapStatistics(DwrfProto.KeyInfo key, ColumnStatistics columnStatistics) + { + requireNonNull(key, "key is null"); + requireNonNull(columnStatistics, "columnStatistics is null"); + nonNullValueCount += columnStatistics.getNumberOfValues(); + hasEntries = true; + entries.add(new MapStatisticsEntry(key, columnStatistics)); + } + + private Optional buildMapStatistics() + { + if (hasEntries) { + MapStatistics mapStatistics = new MapStatistics(entries.build()); + return Optional.of(mapStatistics); + } + return Optional.empty(); + } + + @Override + public ColumnStatistics buildColumnStatistics() + { + if (hasEntries) { + MapStatistics mapStatistics = new MapStatistics(entries.build()); + return new MapColumnStatistics(nonNullValueCount, null, mapStatistics); + } + return new ColumnStatistics(nonNullValueCount, null); + } + + public static Optional mergeMapStatistics(List stats) + { + Map> columnStatisticsByKey = new LinkedHashMap<>(); + + for (ColumnStatistics columnStatistics : stats) { + if (columnStatistics.getNumberOfValues() > 0) { + MapStatistics partialStatistics = columnStatistics.getMapStatistics(); + if (partialStatistics == null) { + // there are non-null values but no statistics, so we can not say anything about the data + return Optional.empty(); + } + + // collect column stats for each key for merging later + for (MapStatisticsEntry entry : partialStatistics.getEntries()) { + List allKeyStats = columnStatisticsByKey.computeIfAbsent(entry.getKey(), (k) -> new ArrayList<>()); + allKeyStats.add(entry.getColumnStatistics()); + } + } + } + + // merge all column stats for each key + MapColumnStatisticsBuilder mapStatisticsBuilder = new MapColumnStatisticsBuilder(); + for (Map.Entry> entry : columnStatisticsByKey.entrySet()) { + ColumnStatistics mergedColumnStatistics = mergeColumnStatistics(entry.getValue()); + DwrfProto.KeyInfo key = entry.getKey(); + mapStatisticsBuilder.addMapStatistics(key, mergedColumnStatistics); + } + + return mapStatisticsBuilder.buildMapStatistics(); + } +} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatistics.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatistics.java new file mode 100644 index 000000000000..354e2f9e12b3 --- /dev/null +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatistics.java @@ -0,0 +1,82 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import org.openjdk.jol.info.ClassLayout; + +import java.util.List; +import java.util.Objects; + +import static com.facebook.presto.orc.metadata.statistics.StatisticsHasher.Hashable; +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class MapStatistics + implements Hashable +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(MapStatistics.class).instanceSize(); + private final List entries; + + public MapStatistics(List entries) + { + this.entries = requireNonNull(entries, "entries is null"); + } + + public List getEntries() + { + return entries; + } + + public long getRetainedSizeInBytes() + { + long entriesSize = 0; + for (MapStatisticsEntry entry : entries) { + entriesSize += entry.getRetainedSizeInBytes(); + } + return INSTANCE_SIZE + entriesSize; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MapStatistics that = (MapStatistics) o; + return Objects.equals(entries, that.entries); + } + + @Override + public int hashCode() + { + return Objects.hash(entries); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("entries", entries) + .toString(); + } + + @Override + public void addHash(StatisticsHasher hasher) + { + entries.forEach(hasher::putOptionalHashable); + } +} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatisticsEntry.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatisticsEntry.java new file mode 100644 index 000000000000..381489f21bb7 --- /dev/null +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/MapStatisticsEntry.java @@ -0,0 +1,98 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import com.facebook.presto.orc.metadata.statistics.StatisticsHasher.Hashable; +import com.facebook.presto.orc.proto.DwrfProto; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class MapStatisticsEntry + implements Hashable +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(MapStatisticsEntry.class).instanceSize(); + private static final int KEY_INSTANCE_SIZE = ClassLayout.parseClass(DwrfProto.KeyInfo.class).instanceSize(); + private final DwrfProto.KeyInfo key; + private final ColumnStatistics columnStatistics; + + public MapStatisticsEntry(DwrfProto.KeyInfo key, ColumnStatistics columnStatistics) + { + this.key = requireNonNull(key, "key is null"); + this.columnStatistics = requireNonNull(columnStatistics, "columnStatistics is null"); + } + + public DwrfProto.KeyInfo getKey() + { + return key; + } + + public ColumnStatistics getColumnStatistics() + { + return columnStatistics; + } + + public long getRetainedSizeInBytes() + { + long keySize = KEY_INSTANCE_SIZE; + if (key.hasBytesKey()) { + keySize += Byte.BYTES * key.getBytesKey().size(); + } + return INSTANCE_SIZE + keySize + columnStatistics.getRetainedSizeInBytes(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MapStatisticsEntry that = (MapStatisticsEntry) o; + return Objects.equals(key, that.key) && + Objects.equals(columnStatistics, that.columnStatistics); + } + + @Override + public int hashCode() + { + return Objects.hash(key, columnStatistics); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("key", key.hasBytesKey() ? key.getBytesKey() : key.getIntKey()) + .add("columnStatistics", columnStatistics) + .toString(); + } + + @Override + public void addHash(StatisticsHasher hasher) + { + if (key.hasBytesKey()) { + hasher.putBytes(key.getBytesKey().asReadOnlyByteBuffer()); + } + else { + hasher.putLong(key.getIntKey()); + } + hasher.putOptionalHashable(columnStatistics); + } +} diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatistics.java b/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatistics.java new file mode 100644 index 000000000000..60f30b65bae4 --- /dev/null +++ b/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatistics.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import com.facebook.presto.orc.proto.DwrfProto.KeyInfo; +import com.facebook.presto.orc.protobuf.ByteString; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; + +public class TestMapColumnStatistics +{ + private static final KeyInfo INT_KEY1 = KeyInfo.newBuilder().setIntKey(1).build(); + private static final KeyInfo INT_KEY2 = KeyInfo.newBuilder().setIntKey(2).build(); + private static final KeyInfo INT_KEY3 = KeyInfo.newBuilder().setIntKey(3).build(); + private static final KeyInfo STRING_KEY1 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s1")).build(); + private static final KeyInfo STRING_KEY2 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s2")).build(); + private static final KeyInfo STRING_KEY3 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s3")).build(); + + @DataProvider + public Object[][] keySupplier() + { + return new Object[][] { + {INT_KEY1, INT_KEY2, INT_KEY3}, + {STRING_KEY1, STRING_KEY2, STRING_KEY3}, + }; + } + + @Test(dataProvider = "keySupplier") + public void testEqualsHashCode(KeyInfo[] keys) + { + MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(); + builder1.addMapStatistics(keys[0], new ColumnStatistics(3L, null)); + builder1.addMapStatistics(keys[1], new ColumnStatistics(5L, null)); + ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics(); + + // same as builder1 + MapColumnStatisticsBuilder builder2 = new MapColumnStatisticsBuilder(); + builder2.addMapStatistics(keys[0], new ColumnStatistics(3L, null)); + builder2.addMapStatistics(keys[1], new ColumnStatistics(5L, null)); + ColumnStatistics columnStatistics2 = builder2.buildColumnStatistics(); + + MapColumnStatisticsBuilder builder3 = new MapColumnStatisticsBuilder(); + builder3.addMapStatistics(keys[1], new ColumnStatistics(5L, null)); + builder3.addMapStatistics(keys[2], new ColumnStatistics(6L, null)); + ColumnStatistics columnStatistics3 = builder3.buildColumnStatistics(); + + // 1 and 2 should be equal + assertEquals(columnStatistics1, columnStatistics2); + assertEquals(columnStatistics1.hashCode(), columnStatistics2.hashCode()); + + // 2 and 3 should be not equal + assertNotEquals(columnStatistics2, columnStatistics3); + assertNotEquals(columnStatistics2.hashCode(), columnStatistics3.hashCode()); + } +} diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatisticsBuilder.java b/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatisticsBuilder.java new file mode 100644 index 000000000000..5ff184e6b9b1 --- /dev/null +++ b/presto-orc/src/test/java/com/facebook/presto/orc/metadata/statistics/TestMapColumnStatisticsBuilder.java @@ -0,0 +1,154 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import com.facebook.presto.orc.proto.DwrfProto.KeyInfo; +import com.facebook.presto.orc.protobuf.ByteString; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +public class TestMapColumnStatisticsBuilder +{ + private static final KeyInfo INT_KEY1 = KeyInfo.newBuilder().setIntKey(1).build(); + private static final KeyInfo INT_KEY2 = KeyInfo.newBuilder().setIntKey(2).build(); + private static final KeyInfo INT_KEY3 = KeyInfo.newBuilder().setIntKey(3).build(); + private static final KeyInfo STRING_KEY1 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s1")).build(); + private static final KeyInfo STRING_KEY2 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s2")).build(); + private static final KeyInfo STRING_KEY3 = KeyInfo.newBuilder().setBytesKey(ByteString.copyFromUtf8("s3")).build(); + + @DataProvider + public Object[][] keySupplier() + { + return new Object[][] { + {INT_KEY1, INT_KEY2, INT_KEY3}, + {STRING_KEY1, STRING_KEY2, STRING_KEY3}, + }; + } + + @Test + public void testAddMapStatisticsNoValues() + { + MapColumnStatisticsBuilder builder = new MapColumnStatisticsBuilder(); + ColumnStatistics columnStatistics = builder.buildColumnStatistics(); + assertEquals(columnStatistics.getClass(), ColumnStatistics.class); + assertEquals(columnStatistics.getNumberOfValues(), 0); + assertNull(columnStatistics.getMapStatistics()); + } + + @Test(dataProvider = "keySupplier") + public void testAddMapStatistics(KeyInfo[] keys) + { + KeyInfo key1 = keys[0]; + KeyInfo key2 = keys[1]; + + ColumnStatistics columnStatistics1 = new ColumnStatistics(3L, null); + ColumnStatistics columnStatistics2 = new ColumnStatistics(5L, null); + + MapColumnStatisticsBuilder builder = new MapColumnStatisticsBuilder(); + builder.addMapStatistics(key1, columnStatistics1); + builder.addMapStatistics(key2, columnStatistics2); + + MapColumnStatistics columnStatistics = (MapColumnStatistics) builder.buildColumnStatistics(); + assertEquals(columnStatistics.getNumberOfValues(), 8); + + MapStatistics mapStatistics = columnStatistics.getMapStatistics(); + List entries = mapStatistics.getEntries(); + assertEquals(entries.size(), 2); + assertEquals(entries.get(0).getKey(), key1); + assertEquals(entries.get(0).getColumnStatistics(), columnStatistics1); + assertEquals(entries.get(1).getKey(), key2); + assertEquals(entries.get(1).getColumnStatistics(), columnStatistics2); + } + + @Test(dataProvider = "keySupplier") + public void testMergeMapStatistics(KeyInfo[] keys) + { + // merge two stats with keys: [k0,k1] and [k1,k2] + // column statistics for k1 should be merged together + MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(); + builder1.addMapStatistics(keys[0], new IntegerColumnStatistics(3L, null, new IntegerStatistics(1L, 2L, 3L))); + builder1.addMapStatistics(keys[1], new IntegerColumnStatistics(5L, null, new IntegerStatistics(10L, 20L, 30L))); + ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics(); + + MapColumnStatisticsBuilder builder2 = new MapColumnStatisticsBuilder(); + builder2.addMapStatistics(keys[1], new IntegerColumnStatistics(7L, null, new IntegerStatistics(25L, 95L, 100L))); + builder2.addMapStatistics(keys[2], new IntegerColumnStatistics(9L, null, new IntegerStatistics(12L, 22L, 32L))); + ColumnStatistics columnStatistics2 = builder2.buildColumnStatistics(); + + MapStatistics mergedMapStatistics = MapColumnStatisticsBuilder.mergeMapStatistics(ImmutableList.of(columnStatistics1, columnStatistics2)).get(); + assertMergedMapStatistics(keys, mergedMapStatistics); + } + + @Test(dataProvider = "keySupplier") + public void testMergeMapStatisticsMissingStats(KeyInfo[] keys) + { + // valid map stat + MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(); + builder1.addMapStatistics(keys[0], new ColumnStatistics(3L, null)); + ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics(); + + // invalid map stat + ColumnStatistics columnStatistics2 = new ColumnStatistics(7L, null); + + Optional mergedMapStats = MapColumnStatisticsBuilder.mergeMapStatistics(ImmutableList.of(columnStatistics1, columnStatistics2)); + assertFalse(mergedMapStats.isPresent()); + } + + @Test(dataProvider = "keySupplier") + public void testMergeColumnStatistics(KeyInfo[] keys) + { + // merge two stats with keys: [k0,k1] and [k1,k2] + // column statistics for k1 should be merged together + MapColumnStatisticsBuilder builder1 = new MapColumnStatisticsBuilder(); + builder1.addMapStatistics(keys[0], new IntegerColumnStatistics(3L, null, new IntegerStatistics(1L, 2L, 3L))); + builder1.addMapStatistics(keys[1], new IntegerColumnStatistics(5L, null, new IntegerStatistics(10L, 20L, 30L))); + ColumnStatistics columnStatistics1 = builder1.buildColumnStatistics(); + + MapColumnStatisticsBuilder builder2 = new MapColumnStatisticsBuilder(); + builder2.addMapStatistics(keys[1], new IntegerColumnStatistics(7L, null, new IntegerStatistics(25L, 95L, 100L))); + builder2.addMapStatistics(keys[2], new IntegerColumnStatistics(9L, null, new IntegerStatistics(12L, 22L, 32L))); + ColumnStatistics columnStatistics2 = builder2.buildColumnStatistics(); + + ColumnStatistics mergedColumnStatistics = ColumnStatistics.mergeColumnStatistics(ImmutableList.of(columnStatistics1, columnStatistics2)); + MapStatistics mergedMapStatistics = mergedColumnStatistics.getMapStatistics(); + assertMergedMapStatistics(keys, mergedMapStatistics); + } + + private void assertMergedMapStatistics(KeyInfo[] keys, MapStatistics mergedMapStatistics) + { + assertNotNull(mergedMapStatistics); + List entries = mergedMapStatistics.getEntries(); + + assertEquals(entries.size(), 3); + Map columnStatisticsByKey = new HashMap<>(); + for (MapStatisticsEntry entry : entries) { + columnStatisticsByKey.put(entry.getKey(), entry.getColumnStatistics()); + } + + assertEquals(columnStatisticsByKey.get(keys[0]), new IntegerColumnStatistics(3L, null, new IntegerStatistics(1L, 2L, 3L))); + assertEquals(columnStatisticsByKey.get(keys[1]), new IntegerColumnStatistics(12L, null, new IntegerStatistics(10L, 95L, 130L))); // merged stats + assertEquals(columnStatisticsByKey.get(keys[2]), new IntegerColumnStatistics(9L, null, new IntegerStatistics(12L, 22L, 32L))); + } +}