From 3fda4bc8c9785742d0b13edba830c0791e42b98c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 6 Feb 2020 14:17:02 -0800 Subject: [PATCH] Add non partition column filters to EXPLAIN (TYPE IO) (#25) --- presto-docs/src/main/sphinx/sql/explain.rst | 48 +++-- .../hive/TestHiveIntegrationSmokeTest.java | 172 +++++++++++++++++- .../planner/planprinter/IoPlanPrinter.java | 107 ++++++++++- .../tests/TestTpchDistributedQueries.java | 1 + 4 files changed, 299 insertions(+), 29 deletions(-) diff --git a/presto-docs/src/main/sphinx/sql/explain.rst b/presto-docs/src/main/sphinx/sql/explain.rst index 06b82fdcf405..de4e1a6ec7f3 100644 --- a/presto-docs/src/main/sphinx/sql/explain.rst +++ b/presto-docs/src/main/sphinx/sql/explain.rst @@ -111,7 +111,7 @@ IO: .. code-block:: none - presto:hive> EXPLAIN (TYPE IO, FORMAT JSON) INSERT INTO test_nation SELECT * FROM nation WHERE regionkey = 2; + presto:hive> EXPLAIN (TYPE IO, FORMAT JSON) INSERT INTO test_lineitem SELECT * FROM lineitem WHERE shipdate = '2020-02-01' AND quantity > 10; Query Plan ----------------------------------- { @@ -120,47 +120,63 @@ IO: "catalog" : "hive", "schemaTable" : { "schema" : "tpch", - "table" : "nation" + "table" : "lineitem" } }, - "columns" : [ { - "columnName" : "regionkey", - "type" : "bigint", + "columnConstraints" : [ { + "columnName" : "shipdate", + "type" : "varchar(10)", "domain" : { "nullsAllowed" : false, "ranges" : [ { "low" : { - "value" : "2", + "value" : "2020-02-01", "bound" : "EXACTLY" }, "high" : { - "value" : "2", + "value" : "2020-02-01", "bound" : "EXACTLY" } } ] } } ], + "columnFilters" : [ { + "columnName" : "quantity", + "type" : "double", + "domain" : { + "nullsAllowed" : false, + "ranges" : [ { + "low" : { + "value" : "10.0", + "bound" : "ABOVE" + }, + "high" : { + "bound" : "BELOW" + } + } ] + } + } ], "estimate" : { - "outputRowCount" : 15000.0, - "outputSizeInBytes" : 1597294.0, - "cpuCost" : 1597294.0, + "outputRowCount" : 60175.0, + "outputSizeInBytes" : 8077041.0, + "cpuCost" : 8077041.0, "maxMemory" : 0.0, "networkCost" : 0.0 - }, + } } ], "outputTable" : { "catalog" : "hive", "schemaTable" : { "schema" : "tpch", - "table" : "test_nation" + "table" : "test_lineitem" } }, "estimate" : { - "outputRowCount" : 15000.0, - "outputSizeInBytes" : 1597294.0, - "cpuCost" : 1597294.0, + "outputRowCount" : 49122.45 + "outputSizeInBytes" : 6593502.86 + "cpuCost" : 16154082.0, "maxMemory" : 0.0, - "networkCost" : 1597294.0 + "networkCost" : 6593502.86 } } diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java index 486b83834658..9468496203f0 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java @@ -16,6 +16,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import io.airlift.json.JsonCodec; +import io.airlift.json.JsonCodecFactory; +import io.airlift.json.ObjectMapperProvider; import io.prestosql.Session; import io.prestosql.connector.CatalogName; import io.prestosql.cost.StatsAndCosts; @@ -61,7 +64,9 @@ import io.prestosql.testing.MaterializedRow; import io.prestosql.tests.AbstractTestIntegrationSmokeTest; import io.prestosql.tests.DistributedQueryRunner; +import io.prestosql.type.TypeDeserializer; import org.apache.hadoop.fs.Path; +import org.assertj.core.util.Sets; import org.intellij.lang.annotations.Language; import org.testng.annotations.Test; @@ -112,6 +117,7 @@ import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER; import static io.prestosql.plugin.hive.HiveUtil.columnExtraInfo; import static io.prestosql.plugin.hive.TestEventListenerPlugin.TestingEventListenerPlugin; +import static io.prestosql.spi.predicate.Marker.Bound.ABOVE; import static io.prestosql.spi.predicate.Marker.Bound.EXACTLY; import static io.prestosql.spi.security.SelectedRole.Type.ROLE; import static io.prestosql.spi.type.BigintType.BIGINT; @@ -464,7 +470,7 @@ public void testSchemaOperations() } @Test - public void testIOExplain() + public void testIoExplain() { // Test IO explain with small number of discrete components. computeActual("CREATE TABLE test_orders WITH (partitioned_by = ARRAY['orderkey', 'processing']) AS SELECT custkey, orderkey, orderstatus = 'P' processing FROM orders WHERE orderkey < 3"); @@ -472,7 +478,7 @@ public void testIOExplain() EstimatedStatsAndCost estimate = new EstimatedStatsAndCost(2.0, 40.0, 40.0, 0.0, 0.0); MaterializedResult result = computeActual("EXPLAIN (TYPE IO, FORMAT JSON) INSERT INTO test_orders SELECT custkey, orderkey, processing FROM test_orders WHERE custkey <= 10"); assertEquals( - jsonCodec(IoPlan.class).fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), + getIoPlanCodec().fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), new IoPlan( ImmutableSet.of( new TableColumnInfo( @@ -499,6 +505,16 @@ public void testIOExplain() new FormattedRange( new FormattedMarker(Optional.of("false"), EXACTLY), new FormattedMarker(Optional.of("false"), EXACTLY)))))), + ImmutableSet.of( + new ColumnConstraint( + "custkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.empty(), ABOVE), + new FormattedMarker(Optional.of("10"), EXACTLY)))))), estimate)), Optional.of(new CatalogSchemaTableName(catalog, "tpch", "test_orders")), estimate)); @@ -511,7 +527,7 @@ public void testIOExplain() estimate = new EstimatedStatsAndCost(55.0, 990.0, 990.0, 0.0, 0.0); result = computeActual("EXPLAIN (TYPE IO, FORMAT JSON) INSERT INTO test_orders SELECT custkey, orderkey + 10 FROM test_orders WHERE custkey <= 10"); assertEquals( - jsonCodec(IoPlan.class).fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), + getIoPlanCodec().fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), new IoPlan( ImmutableSet.of( new TableColumnInfo( @@ -526,6 +542,16 @@ public void testIOExplain() new FormattedRange( new FormattedMarker(Optional.of("1"), EXACTLY), new FormattedMarker(Optional.of("199"), EXACTLY)))))), + ImmutableSet.of( + new ColumnConstraint( + "custkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.empty(), ABOVE), + new FormattedMarker(Optional.of("10"), EXACTLY)))))), estimate)), Optional.of(new CatalogSchemaTableName(catalog, "tpch", "test_orders")), estimate)); @@ -533,6 +559,138 @@ public void testIOExplain() assertUpdate("DROP TABLE test_orders"); } + @Test + public void testIoExplainColumnFilters() + { + // Test IO explain with small number of discrete components. + computeActual("CREATE TABLE test_orders WITH (partitioned_by = ARRAY['orderkey']) AS SELECT custkey, orderstatus, orderkey FROM orders WHERE orderkey < 3"); + + EstimatedStatsAndCost estimate = new EstimatedStatsAndCost(2.0, 48.0, 48.0, 0.0, 0.0); + EstimatedStatsAndCost finalEstimate = new EstimatedStatsAndCost(0.0, 0.0, 96.0, 0.0, 0.0); + MaterializedResult result = computeActual("EXPLAIN (TYPE IO, FORMAT JSON) SELECT custkey, orderkey, orderstatus FROM test_orders WHERE custkey <= 10 and orderstatus='P'"); + assertEquals( + getIoPlanCodec().fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), + new IoPlan( + ImmutableSet.of( + new TableColumnInfo( + new CatalogSchemaTableName(catalog, "tpch", "test_orders"), + ImmutableSet.of( + new ColumnConstraint( + "orderkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.of("1"), EXACTLY), + new FormattedMarker(Optional.of("1"), EXACTLY)), + new FormattedRange( + new FormattedMarker(Optional.of("2"), EXACTLY), + new FormattedMarker(Optional.of("2"), EXACTLY)))))), + Sets.newLinkedHashSet( + new ColumnConstraint( + "custkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.empty(), ABOVE), + new FormattedMarker(Optional.of("10"), EXACTLY))))), + new ColumnConstraint( + "orderstatus", + VarcharType.createVarcharType(1).getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.of("P"), EXACTLY), + new FormattedMarker(Optional.of("P"), EXACTLY)))))), + estimate)), + Optional.empty(), + finalEstimate)); + result = computeActual("EXPLAIN (TYPE IO, FORMAT JSON) SELECT custkey, orderkey, orderstatus FROM test_orders WHERE custkey <= 10 and (orderstatus='P' or orderstatus='S')"); + assertEquals( + getIoPlanCodec().fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), + new IoPlan( + ImmutableSet.of( + new TableColumnInfo( + new CatalogSchemaTableName(catalog, "tpch", "test_orders"), + ImmutableSet.of( + new ColumnConstraint( + "orderkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.of("1"), EXACTLY), + new FormattedMarker(Optional.of("1"), EXACTLY)), + new FormattedRange( + new FormattedMarker(Optional.of("2"), EXACTLY), + new FormattedMarker(Optional.of("2"), EXACTLY)))))), + Sets.newLinkedHashSet( + new ColumnConstraint( + "orderstatus", + VarcharType.createVarcharType(1).getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.of("P"), EXACTLY), + new FormattedMarker(Optional.of("P"), EXACTLY)), + new FormattedRange( + new FormattedMarker(Optional.of("S"), EXACTLY), + new FormattedMarker(Optional.of("S"), EXACTLY))))), + new ColumnConstraint( + "custkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.empty(), ABOVE), + new FormattedMarker(Optional.of("10"), EXACTLY)))))), + estimate)), + Optional.empty(), + finalEstimate)); + result = computeActual("EXPLAIN (TYPE IO, FORMAT JSON) SELECT custkey, orderkey, orderstatus FROM test_orders WHERE custkey <= 10 and cast(orderstatus as integer) = 5"); + assertEquals( + getIoPlanCodec().fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())), + new IoPlan( + ImmutableSet.of( + new TableColumnInfo( + new CatalogSchemaTableName(catalog, "tpch", "test_orders"), + ImmutableSet.of( + new ColumnConstraint( + "orderkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.of("1"), EXACTLY), + new FormattedMarker(Optional.of("1"), EXACTLY)), + new FormattedRange( + new FormattedMarker(Optional.of("2"), EXACTLY), + new FormattedMarker(Optional.of("2"), EXACTLY)))))), + ImmutableSet.of( + new ColumnConstraint( + "custkey", + BIGINT.getTypeSignature(), + new FormattedDomain( + false, + ImmutableSet.of( + new FormattedRange( + new FormattedMarker(Optional.empty(), ABOVE), + new FormattedMarker(Optional.of("10"), EXACTLY)))))), + estimate)), + Optional.empty(), + finalEstimate)); + + assertUpdate("DROP TABLE test_orders"); + } + @Test public void testIoExplainWithPrimitiveTypes() { @@ -577,6 +735,7 @@ public void testIoExplainWithPrimitiveTypes() new FormattedRange( new FormattedMarker(Optional.of(entry.getKey().toString()), EXACTLY), new FormattedMarker(Optional.of(entry.getKey().toString()), EXACTLY)))))), + ImmutableSet.of(), estimate)), Optional.empty(), estimate), @@ -4612,6 +4771,13 @@ private List getAllTestingHiveStorageFormat() return formats.build(); } + private JsonCodec getIoPlanCodec() + { + ObjectMapperProvider objectMapperProvider = new ObjectMapperProvider(); + objectMapperProvider.setJsonDeserializers(ImmutableMap.of(Type.class, new TypeDeserializer(getQueryRunner().getMetadata()))); + return new JsonCodecFactory(objectMapperProvider).jsonCodec(IoPlan.class); + } + private static class TestingHiveStorageFormat { private final Session session; diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/planprinter/IoPlanPrinter.java b/presto-main/src/main/java/io/prestosql/sql/planner/planprinter/IoPlanPrinter.java index 2b1e71b72f7b..1875bb2bbb9c 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/planprinter/IoPlanPrinter.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/planprinter/IoPlanPrinter.java @@ -31,10 +31,13 @@ import io.prestosql.spi.predicate.Marker.Bound; import io.prestosql.spi.predicate.TupleDomain; import io.prestosql.spi.type.Type; +import io.prestosql.sql.planner.DomainTranslator; import io.prestosql.spi.type.TypeSignature; import io.prestosql.sql.planner.Plan; +import io.prestosql.sql.planner.plan.FilterNode; import io.prestosql.sql.planner.plan.PlanNode; import io.prestosql.sql.planner.plan.PlanVisitor; +import io.prestosql.sql.planner.plan.ProjectNode; import io.prestosql.sql.planner.plan.TableFinishNode; import io.prestosql.sql.planner.plan.TableScanNode; import io.prestosql.sql.planner.plan.TableWriterNode.CreateReference; @@ -44,6 +47,7 @@ import io.prestosql.sql.planner.plan.TableWriterNode.InsertTarget; import io.prestosql.sql.planner.plan.TableWriterNode.WriterTarget; import io.prestosql.sql.planner.planprinter.IoPlanPrinter.IoPlan.IoPlanBuilder; +import io.prestosql.sql.tree.Expression; import java.util.HashSet; import java.util.Map; @@ -53,6 +57,7 @@ import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.airlift.json.JsonCodec.jsonCodec; import static io.prestosql.spi.predicate.Marker.Bound.EXACTLY; @@ -203,16 +208,19 @@ public static class TableColumnInfo { private final CatalogSchemaTableName table; private final Set columnConstraints; + private final Set columnFilters; private final EstimatedStatsAndCost estimate; @JsonCreator public TableColumnInfo( @JsonProperty("table") CatalogSchemaTableName table, @JsonProperty("columnConstraints") Set columnConstraints, + @JsonProperty("columnFilters") Set columnFilters, @JsonProperty("estimate") EstimatedStatsAndCost estimate) { this.table = requireNonNull(table, "table is null"); this.columnConstraints = requireNonNull(columnConstraints, "columnConstraints is null"); + this.columnFilters = requireNonNull(columnFilters, "columnFilters is null"); this.estimate = requireNonNull(estimate, "estimate is null"); } @@ -222,6 +230,12 @@ public CatalogSchemaTableName getTable() return table; } + @JsonProperty + public Set getColumnFilters() + { + return columnFilters; + } + @JsonProperty public Set getColumnConstraints() { @@ -246,13 +260,14 @@ public boolean equals(Object obj) TableColumnInfo o = (TableColumnInfo) obj; return Objects.equals(table, o.table) && Objects.equals(columnConstraints, o.columnConstraints) && + Objects.equals(columnFilters, o.columnFilters) && Objects.equals(estimate, o.estimate); } @Override public int hashCode() { - return Objects.hash(table, columnConstraints, estimate); + return Objects.hash(table, columnConstraints, columnFilters, estimate); } @Override @@ -261,6 +276,7 @@ public String toString() return toStringHelper(this) .add("table", table) .add("columnConstraints", columnConstraints) + .add("columnFilters", columnFilters) .add("estimate", estimate) .toString(); } @@ -601,19 +617,90 @@ protected Void visitPlan(PlanNode node, IoPlanBuilder context) } @Override - public Void visitTableScan(TableScanNode node, IoPlanBuilder context) + public Void visitFilter(FilterNode node, IoPlanBuilder context) + { + return visitScanFilterAndProjectInfo(node, Optional.of(node), Optional.empty(), context); + } + + @Override + public Void visitProject(ProjectNode node, IoPlanBuilder context) + { + if (node.getSource() instanceof FilterNode) { + return visitScanFilterAndProjectInfo(node, Optional.of((FilterNode) node.getSource()), Optional.of(node), context); + } + + return visitScanFilterAndProjectInfo(node, Optional.empty(), Optional.of(node), context); + } + + private Void visitScanFilterAndProjectInfo( + PlanNode node, + Optional filterNode, + Optional projectNode, + IoPlanBuilder context) + { + checkState(projectNode.isPresent() || filterNode.isPresent()); + + PlanNode sourceNode; + if (filterNode.isPresent()) { + sourceNode = filterNode.get().getSource(); + } + else { + sourceNode = projectNode.get().getSource(); + } + + Optional scanNode; + if (sourceNode instanceof TableScanNode) { + scanNode = Optional.of((TableScanNode) sourceNode); + } + else { + scanNode = Optional.empty(); + } + + if (scanNode.isPresent() && filterNode.isPresent()) { + addConstraintsFromFilter(filterNode.get(), scanNode.get(), context); + return null; + } + + sourceNode.accept(this, context); + return null; + } + + private void addConstraintsFromFilter(FilterNode filterNode, TableScanNode tableScanNode, IoPlanBuilder context) { - TableMetadata tableMetadata = metadata.getTableMetadata(session, node.getTable()); - TupleDomain predicate = metadata.getTableProperties(session, node.getTable()).getPredicate(); + Expression predicate = filterNode.getPredicate(); + DomainTranslator.ExtractionResult decomposedPredicate = DomainTranslator.fromPredicate( + metadata, + session, + predicate, + plan.getTypes()); + + TupleDomain filterDomain = decomposedPredicate.getTupleDomain() + .transform(tableScanNode.getAssignments()::get) + .intersect(tableScanNode.getEnforcedConstraint()); + addInputTableColumnInfo(tableScanNode, filterDomain, context); + } + + private void addInputTableColumnInfo(TableScanNode node, TupleDomain filters, IoPlanBuilder context) + { + TableHandle table = node.getTable(); + TableMetadata tableMetadata = metadata.getTableMetadata(session, table); + TupleDomain predicate = metadata.getTableProperties(session, table).getPredicate(); EstimatedStatsAndCost estimatedStatsAndCost = getEstimatedStatsAndCost(node); context.addInputTableColumnInfo( new IoPlan.TableColumnInfo( - new CatalogSchemaTableName( - tableMetadata.getCatalogName().getCatalogName(), - tableMetadata.getTable().getSchemaName(), - tableMetadata.getTable().getTableName()), - parseConstraints(node.getTable(), predicate), - estimatedStatsAndCost)); + new CatalogSchemaTableName( + tableMetadata.getCatalogName().getCatalogName(), + tableMetadata.getTable().getSchemaName(), + tableMetadata.getTable().getTableName()), + parseConstraints(table, predicate), + parseConstraints(table, filters), + estimatedStatsAndCost)); + } + + @Override + public Void visitTableScan(TableScanNode node, IoPlanBuilder context) + { + addInputTableColumnInfo(node, TupleDomain.all(), context); return null; } diff --git a/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java b/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java index ad0a4d730b3a..65118ef8571c 100644 --- a/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java +++ b/presto-tests/src/test/java/io/prestosql/tests/TestTpchDistributedQueries.java @@ -67,6 +67,7 @@ public void testIOExplain() new IoPlanPrinter.FormattedRange( new IoPlanPrinter.FormattedMarker(Optional.of("P"), EXACTLY), new IoPlanPrinter.FormattedMarker(Optional.of("P"), EXACTLY)))))), + ImmutableSet.of(), scanEstimate); assertEquals( jsonCodec(IoPlanPrinter.IoPlan.class).fromJson((String) getOnlyElement(result.getOnlyColumnAsSet())),