Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix crash in get_aggsplit #3708

Merged
merged 1 commit into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ accidentally triggering the load of a previous DB version.**
* #3580 Fix memory context bug executing TRUNCATE
* #3654 Fix index attnum mapping in reorder_chunk
* #3661 Fix SkipScan path generation with constant DISTINCT column
* #3708 Fix crash in get_aggsplit

**Thanks**
* @binakot and @sebvett for reporting an issue with DISTINCT queries

**Thanks**
* @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE
* @phemmer for reporting an issue with aggregate queries on multinode

## 2.4.2 (2021-09-21)

Expand Down
50 changes: 32 additions & 18 deletions tsl/src/fdw/estimate.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
#include <postgres.h>
#include <nodes/nodes.h>
#include <nodes/nodeFuncs.h>
#include <optimizer/cost.h>
#include <optimizer/clauses.h>
#include <optimizer/prep.h>
Expand Down Expand Up @@ -38,33 +39,46 @@ typedef struct CostEstimate
Cost run_cost;
} CostEstimate;

static bool
find_first_aggref_walker(Node *node, Aggref **aggref)
{
if (node == NULL)
return false;

if (IsA(node, Aggref))
{
*aggref = castNode(Aggref, node);
return true;
}

return expression_tree_walker(node, find_first_aggref_walker, aggref);
}

/*
* Get the AggsSplit mode of a relation.
*
* The AggSplit (partial or full aggregation) affects costing.
* All aggregates to compute this relation must have the same
* mode, so we only check mode on first match.
*/
static AggSplit
get_aggsplit(RelOptInfo *rel)
get_aggsplit(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *lc;
Aggref *agg;
Assert(root->parse->hasAggs);

foreach (lc, rel->reltarget->exprs)
{
Node *expr = lfirst(lc);
if (find_first_aggref_walker((Node *) rel->reltarget->exprs, &agg))
return agg->aggsplit;

if (IsA(expr, Aggref))
{
/* All aggregates to compute this relation must have the same
* mode, so we can return here. */
return castNode(Aggref, expr)->aggsplit;
}
}
/* Assume AGGSPLIT_SIMPLE (non-partial) aggregate. We really shouldn't
* get there though if this function is called on upper rel with an
* aggregate. */
pg_unreachable();
/* If the aggregate is only referenced in the HAVING clause it will
* not be present in the targetlist so we have to check HAVING clause too. */
if (root->hasHavingQual && find_first_aggref_walker((Node *) root->parse->havingQual, &agg))
return agg->aggsplit;

return AGGSPLIT_SIMPLE;
/* Since PlannerInfo has hasAggs true (checked in caller) we should
* never get here and always find an Aggref. */
elog(ERROR, "no aggref found in targetlist or HAVING clause");
pg_unreachable();
}

static void
Expand Down Expand Up @@ -102,7 +116,7 @@ get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
{
/* Get the aggsplit to use in order to support push-down of partial
* aggregation */
AggSplit aggsplit = get_aggsplit(rel);
AggSplit aggsplit = get_aggsplit(root, rel);

get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts);
}
Expand Down
137 changes: 137 additions & 0 deletions tsl/test/expected/dist_partial_agg-12.out
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,72 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(26 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
Output: location, ((sum(temperature) + sum(humidity))), timec
-> Merge Append
Sort Key: conditions.location, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST
(22 rows)

-- Aggregates with no aggregate reference in targetlist #3664
:PREFIX SELECT :GROUPING
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
HAVING avg(temperature) > 20
ORDER BY :GROUPING, timec;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: location, timec
Sort Key: location, timec
-> Custom Scan (AsyncAppend)
Output: location, timec
-> Append
-> Custom Scan (DataNodeScan)
Output: conditions.location, conditions.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision))
-> Custom Scan (DataNodeScan)
Output: conditions_1.location, conditions_1.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision))
-> Custom Scan (DataNodeScan)
Output: conditions_2.location, conditions_2.timec
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision))
(24 rows)

\set GROUPING 'region, temperature'
\ir 'include/aggregate_queries.sql'
-- This file and its contents are licensed under the Timescale License.
Expand Down Expand Up @@ -438,6 +504,77 @@ SET enable_partitionwise_aggregate = ON;
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(29 rows)

-- Aggregates nested in expressions and no top-level aggregate #3672
:PREFIX SELECT :GROUPING,
sum(temperature)+sum(humidity) as agg_sum_expr
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
ORDER BY :GROUPING, timec;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, temperature, (sum(temperature) + sum(humidity)), timec
Group Key: region, temperature, timec
-> Custom Scan (AsyncAppend)
Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity))
-> Merge Append
Sort Key: conditions.region, conditions.temperature, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity))
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(25 rows)

-- Aggregates with no aggregate reference in targetlist #3664
:PREFIX SELECT :GROUPING
FROM :TEST_TABLE
GROUP BY :GROUPING, timec
HAVING avg(temperature) > 20
ORDER BY :GROUPING, timec;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, temperature, timec
Group Key: region, temperature, timec
Filter: (avg(temperature) > '20'::double precision)
-> Custom Scan (AsyncAppend)
Output: region, temperature, timec, (PARTIAL avg(temperature))
-> Merge Append
Sort Key: conditions.region, conditions.temperature, conditions.timec
-> Custom Scan (DataNodeScan)
Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL avg(conditions.temperature))
Relations: Aggregate on (public.conditions)
Data node: data_node_1
Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL avg(conditions_1.temperature))
Relations: Aggregate on (public.conditions)
Data node: data_node_2
Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL avg(conditions_2.temperature))
Relations: Aggregate on (public.conditions)
Data node: data_node_3
Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk
Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST
(26 rows)

-- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off
\set GROUPING 'location'
SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1",
Expand Down
Loading