Skip to content

Commit

Permalink
Ensure PartitionedOutputOperator is run with fixed local distribution
Browse files Browse the repository at this point in the history
PartitionedOutputOperator maintains buffers for each output partition.
When the operator is run in the same pipeline as the TableScanOperator the
buffers are flushed after each split resulting in small pages being created.
  • Loading branch information
arhimondr committed Nov 20, 2022
1 parent 2332ed7 commit 1c06202
Show file tree
Hide file tree
Showing 467 changed files with 12,714 additions and 10,515 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ public final class SystemSessionProperties
public static final String USE_EXACT_PARTITIONING = "use_exact_partitioning";
public static final String FORCE_SPILLING_JOIN = "force_spilling_join";
public static final String FAULT_TOLERANT_EXECUTION_EVENT_DRIVEN_SCHEDULER_ENABLED = "fault_tolerant_execution_event_driven_scheduler_enabled";
public static final String FORCE_FIXED_DISTRIBUTION_FOR_PARTITIONED_OUTPUT_OPERATOR_ENABLED = "force_fixed_distribution_for_partitioned_output_operator_enabled";

private final List<PropertyMetadata<?>> sessionProperties;

Expand Down Expand Up @@ -865,6 +866,11 @@ public SystemSessionProperties(
FAULT_TOLERANT_EXECUTION_EVENT_DRIVEN_SCHEDULER_ENABLED,
"Enable event driven scheduler for fault tolerant execution",
queryManagerConfig.isFaultTolerantExecutionEventDrivenSchedulerEnabled(),
true),
booleanProperty(
FORCE_FIXED_DISTRIBUTION_FOR_PARTITIONED_OUTPUT_OPERATOR_ENABLED,
"Force partitioned output operator to be run with fixed distribution",
optimizerConfig.isForceFixedDistributionForPartitionedOutputOperatorEnabled(),
true));
}

Expand Down Expand Up @@ -1548,4 +1554,9 @@ public static boolean isFaultTolerantExecutionEventDriverSchedulerEnabled(Sessio
{
return session.getSystemProperty(FAULT_TOLERANT_EXECUTION_EVENT_DRIVEN_SCHEDULER_ENABLED, Boolean.class);
}

public static boolean isForceFixedDistributionForPartitionedOutputOperatorEnabled(Session session)
{
return session.getSystemProperty(FORCE_FIXED_DISTRIBUTION_FOR_PARTITIONED_OUTPUT_OPERATOR_ENABLED, Boolean.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public class OptimizerConfig
private double adaptivePartialAggregationUniqueRowsRatioThreshold = 0.8;
private long joinPartitionedBuildMinRowCount = 1_000_000L;

private boolean forceFixedDistributionForPartitionedOutputOperatorEnabled = true;

public enum JoinReorderingStrategy
{
NONE,
Expand Down Expand Up @@ -756,4 +758,16 @@ public OptimizerConfig setUseExactPartitioning(boolean useExactPartitioning)
this.useExactPartitioning = useExactPartitioning;
return this;
}

public boolean isForceFixedDistributionForPartitionedOutputOperatorEnabled()
{
return forceFixedDistributionForPartitionedOutputOperatorEnabled;
}

@Config("experimental.force-fixed-distribution-for-partitioned-output-operator-enabled")
public OptimizerConfig setForceFixedDistributionForPartitionedOutputOperatorEnabled(boolean forceFixedDistributionForPartitionedOutputOperatorEnabled)
{
this.forceFixedDistributionForPartitionedOutputOperatorEnabled = forceFixedDistributionForPartitionedOutputOperatorEnabled;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,25 +104,33 @@ public class AddExchangesBelowPartialAggregationOverGroupIdRuleSet
typeOf(ExchangeNode.class)
.with(scope().equalTo(REMOTE))
.with(source().matching(
// PushPartialAggregationThroughExchange adds a projection. However, it can be removed if RemoveRedundantIdentityProjections is run in the mean-time.
typeOf(ProjectNode.class).capturedAs(PROJECTION)
typeOf(ProjectNode.class)
.with(source().matching(
typeOf(AggregationNode.class).capturedAs(AGGREGATION)
.with(step().equalTo(AggregationNode.Step.PARTIAL))
.with(nonEmpty(groupingColumns()))
typeOf(ExchangeNode.class)
.with(scope().equalTo(LOCAL))
.with(source().matching(
typeOf(GroupIdNode.class).capturedAs(GROUP_ID)))))));
// PushPartialAggregationThroughExchange adds a projection. However, it can be removed if RemoveRedundantIdentityProjections is run in the mean-time.
typeOf(ProjectNode.class).capturedAs(PROJECTION)
.with(source().matching(
typeOf(AggregationNode.class).capturedAs(AGGREGATION)
.with(step().equalTo(AggregationNode.Step.PARTIAL))
.with(nonEmpty(groupingColumns()))
.with(source().matching(
typeOf(GroupIdNode.class).capturedAs(GROUP_ID)))))))))));

private static final Pattern<ExchangeNode> WITHOUT_PROJECTION =
// If there was no exchange here, adding new exchanges could break property derivations logic of AddExchanges, AddLocalExchanges
typeOf(ExchangeNode.class)
.with(scope().equalTo(REMOTE))
.with(source().matching(
typeOf(AggregationNode.class).capturedAs(AGGREGATION)
.with(step().equalTo(AggregationNode.Step.PARTIAL))
.with(nonEmpty(groupingColumns()))
typeOf(ExchangeNode.class)
.with(scope().equalTo(LOCAL))
.with(source().matching(
typeOf(GroupIdNode.class).capturedAs(GROUP_ID)))));
typeOf(AggregationNode.class).capturedAs(AGGREGATION)
.with(step().equalTo(AggregationNode.Step.PARTIAL))
.with(nonEmpty(groupingColumns()))
.with(source().matching(
typeOf(GroupIdNode.class).capturedAs(GROUP_ID)))))));

private static final double GROUPING_SETS_SYMBOL_REQUIRED_FREQUENCY = 0.5;
private static final double ANTI_SKEWNESS_MARGIN = 3;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import static io.trino.SystemSessionProperties.getTaskPartitionedWriterCount;
import static io.trino.SystemSessionProperties.getTaskWriterCount;
import static io.trino.SystemSessionProperties.isDistributedSortEnabled;
import static io.trino.SystemSessionProperties.isForceFixedDistributionForPartitionedOutputOperatorEnabled;
import static io.trino.SystemSessionProperties.isSpillEnabled;
import static io.trino.SystemSessionProperties.isTaskScaleWritersEnabled;
import static io.trino.sql.ExpressionUtils.isEffectivelyLiteral;
Expand Down Expand Up @@ -743,6 +744,9 @@ public PlanWithProperties visitExchange(ExchangeNode node, StreamPreferredProper
any().withOrderSensitivity(),
any().withOrderSensitivity());
}
if (isForceFixedDistributionForPartitionedOutputOperatorEnabled(session) && node.isHashPartitionedExchange()) {
return planAndEnforceChildren(node, fixedParallelism(), defaultParallelism(session));
}
return planAndEnforceChildren(node, any(), defaultParallelism(session));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.FIXED_PASSTHROUGH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SCALED_WRITER_HASH_DISTRIBUTION;
import static io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION;
import static io.trino.sql.planner.plan.ExchangeNode.Scope.LOCAL;
import static io.trino.sql.planner.plan.ExchangeNode.Scope.REMOTE;
Expand Down Expand Up @@ -248,4 +249,11 @@ public PlanNode replaceChildren(List<PlanNode> newChildren)
{
return new ExchangeNode(getId(), type, scope, partitioningScheme, newChildren, inputs, orderingScheme);
}

public boolean isHashPartitionedExchange()
{
PartitioningHandle partitioningHandle = partitioningScheme.getPartitioning().getHandle();
// catalog specific exchanges are hash partitioned exchanges with a catalog specific partition function
return partitioningHandle.equals(FIXED_HASH_DISTRIBUTION) || partitioningHandle.equals(SCALED_WRITER_HASH_DISTRIBUTION) || partitioningHandle.getCatalogHandle().isPresent();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ public void testDefaults()
.setAdaptivePartialAggregationMinRows(100_000)
.setAdaptivePartialAggregationUniqueRowsRatioThreshold(0.8)
.setJoinPartitionedBuildMinRowCount(1_000_000)
.setUseExactPartitioning(false));
.setUseExactPartitioning(false)
.setForceFixedDistributionForPartitionedOutputOperatorEnabled(true));
}

@Test
Expand Down Expand Up @@ -147,6 +148,7 @@ public void testExplicitPropertyMappings()
.put("adaptive-partial-aggregation.unique-rows-ratio-threshold", "0.99")
.put("optimizer.join-partitioned-build-min-row-count", "1")
.put("optimizer.use-exact-partitioning", "true")
.put("experimental.force-fixed-distribution-for-partitioned-output-operator-enabled", "false")
.buildOrThrow();

OptimizerConfig expected = new OptimizerConfig()
Expand Down Expand Up @@ -200,7 +202,8 @@ public void testExplicitPropertyMappings()
.setAdaptivePartialAggregationMinRows(1)
.setAdaptivePartialAggregationUniqueRowsRatioThreshold(0.99)
.setJoinPartitionedBuildMinRowCount(1)
.setUseExactPartitioning(true);
.setUseExactPartitioning(true)
.setForceFixedDistributionForPartitionedOutputOperatorEnabled(false);
assertFullMapping(properties, expected);
}
}
Loading

0 comments on commit 1c06202

Please sign in to comment.