Skip to content

Commit

Permalink
Fix crash in get_aggsplit
Browse files Browse the repository at this point in the history
When looking for the Aggref to determine whether partial or full
aggregation is used get_aggsplit only checked for top-level Aggrefs
in the targetlist. So a targetlist where all Aggrefs where nested
in other expressions would lead to a crash. This function also
only looked for the Aggref in the targetlist but in a query with
a HAVING clause the aggregate might not be in the targetlist if
it is only referenced in the HAVING clause.

Fixes #3664
Fixes #3672
  • Loading branch information
svenklemm committed Oct 16, 2021
1 parent 57744c8 commit 1bbaf66
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ accidentally triggering the load of a previous DB version.**
* #3580 Fix memory context bug executing TRUNCATE
* #3654 Fix index attnum mapping in reorder_chunk
* #3661 Fix SkipScan path generation with constant DISTINCT column
* #3708 Fix crash in get_aggsplit

**Thanks**
* @binakot and @sebvett for reporting an issue with DISTINCT queries
* @phemmer for reporting an issue with aggregate queries on multinode

**Thanks**
* @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE
Expand Down
50 changes: 32 additions & 18 deletions tsl/src/fdw/estimate.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
#include <postgres.h>
#include <nodes/nodes.h>
#include <nodes/nodeFuncs.h>
#include <optimizer/cost.h>
#include <optimizer/clauses.h>
#include <optimizer/prep.h>
Expand Down Expand Up @@ -38,33 +39,46 @@ typedef struct CostEstimate
Cost run_cost;
} CostEstimate;

static bool
aggref_walker(Node *node, Aggref **aggref)
{
if (node == NULL)
return false;

if (IsA(node, Aggref))
{
*aggref = castNode(Aggref, node);
return true;
}

return expression_tree_walker(node, aggref_walker, aggref);
}

/*
* Get the AggsSplit mode of a relation.
*
* The AggSplit (partial or full aggregation) affects costing.
* All aggregates to compute this relation must have the same
* mode, so we only check mode on first match.
*/
static AggSplit
get_aggsplit(RelOptInfo *rel)
get_aggsplit(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *lc;
Aggref *agg;
Assert(root->parse->hasAggs);

foreach (lc, rel->reltarget->exprs)
{
Node *expr = lfirst(lc);
if (aggref_walker((Node *) rel->reltarget->exprs, &agg))
return agg->aggsplit;

if (IsA(expr, Aggref))
{
/* All aggregates to compute this relation must have the same
* mode, so we can return here. */
return castNode(Aggref, expr)->aggsplit;
}
}
/* Assume AGGSPLIT_SIMPLE (non-partial) aggregate. We really shouldn't
* get there though if this function is called on upper rel with an
* aggregate. */
pg_unreachable();
/* If the aggregate is only referenced in the HAVING clause it will
* not be present in the targetlist so we check HAVING clause too. */
if (root->hasHavingQual && aggref_walker((Node *) root->parse->havingQual, &agg))
return agg->aggsplit;

return AGGSPLIT_SIMPLE;
/* Since PlannerInfo has hasAggs true (checked in caller) we should
* never get here and always find an Aggref. */
elog(ERROR, "no aggref found in targetlist");
pg_unreachable();
}

static void
Expand Down Expand Up @@ -102,7 +116,7 @@ get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
{
/* Get the aggsplit to use in order to support push-down of partial
* aggregation */
AggSplit aggsplit = get_aggsplit(rel);
AggSplit aggsplit = get_aggsplit(root, rel);

get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts);
}
Expand Down
67 changes: 67 additions & 0 deletions tsl/test/expected/dist_partial_agg-12.out
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,38 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(26 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
Output: location, ((sum(temperature) + sum(humidity))), timec
-> Merge Append
Sort Key: conditions.location, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(22 rows)

\set GROUPING 'region, temperature'
\ir 'include/aggregate_queries.sql'
-- This file and its contents are licensed under the Timescale License.
Expand Down Expand Up @@ -438,6 +470,41 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(29 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, temperature, (sum(temperature) + sum(humidity)), timec
Group Key: region, temperature, timec
-> Custom Scan (AsyncAppend)
Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity))
-> Merge Append
Sort Key: conditions.region, conditions.temperature, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(25 rows)

-- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off
\set GROUPING 'location'
SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1",
Expand Down
67 changes: 67 additions & 0 deletions tsl/test/expected/dist_partial_agg-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,38 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(26 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
Output: location, ((sum(temperature) + sum(humidity))), timec
-> Merge Append
Sort Key: conditions.location, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(22 rows)

\set GROUPING 'region, temperature'
\ir 'include/aggregate_queries.sql'
-- This file and its contents are licensed under the Timescale License.
Expand Down Expand Up @@ -438,6 +470,41 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(29 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, temperature, (sum(temperature) + sum(humidity)), timec
Group Key: region, temperature, timec
-> Custom Scan (AsyncAppend)
Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity))
-> Merge Append
Sort Key: conditions.region, conditions.temperature, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(25 rows)

-- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off
\set GROUPING 'location'
SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1",
Expand Down
Loading

0 comments on commit 1bbaf66

Please sign in to comment.