-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add local dynamic filter support in IcebergPageSourceProvder #5719
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
import io.prestosql.metadata.Metadata; | ||
import io.prestosql.metadata.QualifiedObjectName; | ||
import io.prestosql.metadata.TableHandle; | ||
import io.prestosql.operator.OperatorStats; | ||
import io.prestosql.spi.connector.ColumnHandle; | ||
import io.prestosql.spi.connector.Constraint; | ||
import io.prestosql.spi.predicate.NullableValue; | ||
|
@@ -31,6 +32,7 @@ | |
import io.prestosql.testing.MaterializedResult; | ||
import io.prestosql.testing.MaterializedRow; | ||
import io.prestosql.testing.QueryRunner; | ||
import io.prestosql.testing.ResultWithQueryId; | ||
import org.apache.iceberg.FileFormat; | ||
import org.intellij.lang.annotations.Language; | ||
import org.testng.annotations.Test; | ||
|
@@ -45,9 +47,13 @@ | |
|
||
import static com.google.common.base.Preconditions.checkArgument; | ||
import static com.google.common.collect.Iterables.getOnlyElement; | ||
import static io.airlift.testing.Assertions.assertGreaterThan; | ||
import static io.airlift.testing.Assertions.assertLessThan; | ||
import static io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; | ||
import static io.prestosql.plugin.iceberg.IcebergQueryRunner.createIcebergQueryRunner; | ||
import static io.prestosql.spi.type.DoubleType.DOUBLE; | ||
import static io.prestosql.spi.type.VarcharType.VARCHAR; | ||
import static io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType.BROADCAST; | ||
import static io.prestosql.testing.MaterializedResult.resultBuilder; | ||
import static io.prestosql.testing.assertions.Assert.assertEquals; | ||
import static io.prestosql.transaction.TransactionBuilder.transaction; | ||
|
@@ -1389,4 +1395,55 @@ private void dropTable(String table) | |
assertUpdate(session, "DROP TABLE " + table); | ||
assertFalse(getQueryRunner().tableExists(session, table)); | ||
} | ||
|
||
@Test | ||
public void testLocalDynamicFilterWithEmptyBuildSide() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests rely on fact that probe side will not progress until build side is not ready. Because of that these tests are fragile and might break when we change execution (which we will). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we tackle introduce waiting for split generation like Hive separately and let this one go through with less restrictive assertions ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would prevent optimization like #3957 Alternatively, we can simply add that blocking because it's pretty straight forward (just pass DF future to |
||
{ | ||
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner(); | ||
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId( | ||
withBroadcastJoin(), | ||
"SELECT * FROM lineitem JOIN supplier ON lineitem.suppkey = supplier.suppkey AND supplier.name = 'abc'"); | ||
assertEquals(result.getResult().getRowCount(), 0); | ||
|
||
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch.lineitem"); | ||
assertEquals(probeStats.getInputPositions(), 0); | ||
assertEquals(probeStats.getDynamicFilterSplitsProcessed(), probeStats.getTotalDrivers()); | ||
} | ||
|
||
@Test | ||
public void testDynamicFilterWithSelectiveBuildSide() | ||
{ | ||
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner(); | ||
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId( | ||
withBroadcastJoin(), | ||
"SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey AND orders.totalprice = 172799.49"); | ||
assertGreaterThan(result.getResult().getRowCount(), 0); | ||
|
||
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch.lineitem"); | ||
// Probe-side is partially scanned | ||
assertLessThan(probeStats.getInputPositions(), 60175L); | ||
assertEquals(probeStats.getDynamicFilterSplitsProcessed(), probeStats.getTotalDrivers()); | ||
} | ||
|
||
@Test | ||
public void testDynamicFilterWithNonSelectiveBuildSide() | ||
{ | ||
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner(); | ||
ResultWithQueryId<MaterializedResult> result = runner.executeWithQueryId( | ||
withBroadcastJoin(), | ||
"SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
assertGreaterThan(result.getResult().getRowCount(), 0); | ||
|
||
OperatorStats probeStats = searchScanFilterAndProjectOperatorStats(result.getQueryId(), "tpch.lineitem"); | ||
// Probe-side is fully scanned | ||
assertEquals(probeStats.getInputPositions(), 60175L); | ||
assertEquals(probeStats.getDynamicFilterSplitsProcessed(), 0); | ||
} | ||
|
||
private Session withBroadcastJoin() | ||
{ | ||
return Session.builder(this.getQueryRunner().getDefaultSession()) | ||
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) | ||
.build(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
import io.prestosql.execution.warnings.WarningCollector; | ||
import io.prestosql.metadata.Metadata; | ||
import io.prestosql.operator.OperatorStats; | ||
import io.prestosql.server.DynamicFilterService; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unused |
||
import io.prestosql.spi.QueryId; | ||
import io.prestosql.spi.type.Type; | ||
import io.prestosql.spi.type.TypeOperators; | ||
|
@@ -442,7 +443,7 @@ protected OperatorStats searchScanFilterAndProjectOperatorStats(QueryId queryId, | |
return false; | ||
} | ||
TableScanNode tableScanNode = (TableScanNode) filterNode.getSource(); | ||
return tableName.equals(tableScanNode.getTable().getConnectorHandle().toString()); | ||
return tableScanNode.getTable().getConnectorHandle().toString().contains(tableName); | ||
}) | ||
.findOnlyElement() | ||
.getId(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you passed
dynamicFilter
toIcebergPageSource
you could easily block on DF inio.prestosql.spi.connector.ConnectorPageSource#isBlocked
. However, this should be behind feature toggleThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems from the code that stripe pruning happens before
IcebergPageSource
is created increateDataPageSource -> createOrcPageSource -> reader.createRecordReader -> OrcRecordReader
. So we would miss that even if we block in IcebergPageSource.I think row group pruning could still be accomplished by blocking on DF in IcebergPageSource if
dynamicFilter
is pushed intoStripeReader
as well. Not sure if the changes are worth doing though.Please correct if I'm missing something.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could probably create
createDataPageSource
inIcebergPageSource
in a lazy way (when DF is ready). This way we don't allocate resources until DF is ready (I'm not sure it's big of an issue though)