From 1d59577f9dad42e609fa53a0a35546707e7fec75 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Sat, 16 Oct 2021 23:55:48 +0200 Subject: [PATCH] Fix crash in get_aggsplit When looking for the Aggref to determine whether partial or full aggregation is used get_aggsplit only checked for top-level Aggrefs in the targetlist. So a targetlist where all Aggrefs where nested in other expressions would lead to a crash. This function also only looked for the Aggref in the targetlist but in a query with a HAVING clause the aggregate might not be in the targetlist if it is only referenced in the HAVING clause. Fixes #3664 Fixes #3672 --- CHANGELOG.md | 4 +- tsl/src/fdw/estimate.c | 50 +++++--- tsl/test/expected/dist_partial_agg-12.out | 137 +++++++++++++++++++++ tsl/test/expected/dist_partial_agg-13.out | 137 +++++++++++++++++++++ tsl/test/expected/dist_partial_agg-14.out | 137 +++++++++++++++++++++ tsl/test/sql/include/aggregate_queries.sql | 15 +++ 6 files changed, 460 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91a01f26409..c7ed0283921 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,12 @@ accidentally triggering the load of a previous DB version.** * #3580 Fix memory context bug executing TRUNCATE * #3654 Fix index attnum mapping in reorder_chunk * #3661 Fix SkipScan path generation with constant DISTINCT column +* #3708 Fix crash in get_aggsplit **Thanks** * @binakot and @sebvett for reporting an issue with DISTINCT queries - -**Thanks** * @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE +* @phemmer for reporting an issue with aggregate queries on multinode ## 2.4.2 (2021-09-21) diff --git a/tsl/src/fdw/estimate.c b/tsl/src/fdw/estimate.c index e8c4d9f83d5..722e021f382 100644 --- a/tsl/src/fdw/estimate.c +++ b/tsl/src/fdw/estimate.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include @@ -38,33 +39,46 @@ typedef struct CostEstimate Cost run_cost; } CostEstimate; +static bool +find_first_aggref_walker(Node *node, Aggref **aggref) +{ + if (node == NULL) + return false; + + if (IsA(node, Aggref)) + { + *aggref = castNode(Aggref, node); + return true; + } + + return expression_tree_walker(node, find_first_aggref_walker, aggref); +} + /* * Get the AggsSplit mode of a relation. * * The AggSplit (partial or full aggregation) affects costing. + * All aggregates to compute this relation must have the same + * mode, so we only check mode on first match. */ static AggSplit -get_aggsplit(RelOptInfo *rel) +get_aggsplit(PlannerInfo *root, RelOptInfo *rel) { - ListCell *lc; + Aggref *agg; + Assert(root->parse->hasAggs); - foreach (lc, rel->reltarget->exprs) - { - Node *expr = lfirst(lc); + if (find_first_aggref_walker((Node *) rel->reltarget->exprs, &agg)) + return agg->aggsplit; - if (IsA(expr, Aggref)) - { - /* All aggregates to compute this relation must have the same - * mode, so we can return here. */ - return castNode(Aggref, expr)->aggsplit; - } - } - /* Assume AGGSPLIT_SIMPLE (non-partial) aggregate. We really shouldn't - * get there though if this function is called on upper rel with an - * aggregate. */ - pg_unreachable(); + /* If the aggregate is only referenced in the HAVING clause it will + * not be present in the targetlist so we have to check HAVING clause too. */ + if (root->hasHavingQual && find_first_aggref_walker((Node *) root->parse->havingQual, &agg)) + return agg->aggsplit; - return AGGSPLIT_SIMPLE; + /* Since PlannerInfo has hasAggs true (checked in caller) we should + * never get here and always find an Aggref. */ + elog(ERROR, "no aggref found in targetlist or HAVING clause"); + pg_unreachable(); } static void @@ -102,7 +116,7 @@ get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce) { /* Get the aggsplit to use in order to support push-down of partial * aggregation */ - AggSplit aggsplit = get_aggsplit(rel); + AggSplit aggsplit = get_aggsplit(root, rel); get_agg_clause_costs_compat(root, (Node *) fpinfo->grouped_tlist, aggsplit, &aggcosts); } diff --git a/tsl/test/expected/dist_partial_agg-12.out b/tsl/test/expected/dist_partial_agg-12.out index 4b273cc043e..3ad739b036b 100644 --- a/tsl/test/expected/dist_partial_agg-12.out +++ b/tsl/test/expected/dist_partial_agg-12.out @@ -276,6 +276,72 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST (26 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) + Output: location, ((sum(temperature) + sum(humidity))), timec + -> Merge Append + Sort Key: conditions.location, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST +(22 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: location, timec + Sort Key: location, timec + -> Custom Scan (AsyncAppend) + Output: location, timec + -> Append + -> Custom Scan (DataNodeScan) + Output: conditions.location, conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) +(24 rows) + \set GROUPING 'region, temperature' \ir 'include/aggregate_queries.sql' -- This file and its contents are licensed under the Timescale License. @@ -438,6 +504,77 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST (29 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, (sum(temperature) + sum(humidity)), timec + Group Key: region, temperature, timec + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(25 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, timec + Group Key: region, temperature, timec + Filter: (avg(temperature) > '20'::double precision) + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL avg(temperature)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL avg(conditions.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL avg(conditions_1.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL avg(conditions_2.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(26 rows) + -- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off \set GROUPING 'location' SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1", diff --git a/tsl/test/expected/dist_partial_agg-13.out b/tsl/test/expected/dist_partial_agg-13.out index 4b273cc043e..3ad739b036b 100644 --- a/tsl/test/expected/dist_partial_agg-13.out +++ b/tsl/test/expected/dist_partial_agg-13.out @@ -276,6 +276,72 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST (26 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) + Output: location, ((sum(temperature) + sum(humidity))), timec + -> Merge Append + Sort Key: conditions.location, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST +(22 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: location, timec + Sort Key: location, timec + -> Custom Scan (AsyncAppend) + Output: location, timec + -> Append + -> Custom Scan (DataNodeScan) + Output: conditions.location, conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) +(24 rows) + \set GROUPING 'region, temperature' \ir 'include/aggregate_queries.sql' -- This file and its contents are licensed under the Timescale License. @@ -438,6 +504,77 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST (29 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, (sum(temperature) + sum(humidity)), timec + Group Key: region, temperature, timec + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(25 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, timec + Group Key: region, temperature, timec + Filter: (avg(temperature) > '20'::double precision) + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL avg(temperature)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL avg(conditions.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL avg(conditions_1.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL avg(conditions_2.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(26 rows) + -- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off \set GROUPING 'location' SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1", diff --git a/tsl/test/expected/dist_partial_agg-14.out b/tsl/test/expected/dist_partial_agg-14.out index 4b273cc043e..3ad739b036b 100644 --- a/tsl/test/expected/dist_partial_agg-14.out +++ b/tsl/test/expected/dist_partial_agg-14.out @@ -276,6 +276,72 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST (26 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) + Output: location, ((sum(temperature) + sum(humidity))), timec + -> Merge Append + Sort Key: conditions.location, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.location, ((sum(conditions.temperature) + sum(conditions.humidity))), conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, ((sum(conditions_1.temperature) + sum(conditions_1.humidity))), conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, ((sum(conditions_2.temperature) + sum(conditions_2.humidity))), conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, (sum(temperature) + sum(humidity)), timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 3 ORDER BY location ASC NULLS LAST, timec ASC NULLS LAST +(22 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: location, timec + Sort Key: location, timec + -> Custom Scan (AsyncAppend) + Output: location, timec + -> Append + -> Custom Scan (DataNodeScan) + Output: conditions.location, conditions.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_1.location, conditions_1.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) + -> Custom Scan (DataNodeScan) + Output: conditions_2.location, conditions_2.timec + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT location, timec FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2 HAVING ((avg(temperature) > 20::double precision)) +(24 rows) + \set GROUPING 'region, temperature' \ir 'include/aggregate_queries.sql' -- This file and its contents are licensed under the Timescale License. @@ -438,6 +504,77 @@ SET enable_partitionwise_aggregate = ON; Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST (29 rows) +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, (sum(temperature) + sum(humidity)), timec + Group Key: region, temperature, timec + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL sum(temperature)), (PARTIAL sum(humidity)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL sum(conditions.temperature)), (PARTIAL sum(conditions.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL sum(conditions_1.temperature)), (PARTIAL sum(conditions_1.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL sum(conditions_2.temperature)), (PARTIAL sum(conditions_2.humidity)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(sum(temperature)), _timescaledb_internal.partialize_agg(sum(humidity)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(25 rows) + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: region, temperature, timec + Group Key: region, temperature, timec + Filter: (avg(temperature) > '20'::double precision) + -> Custom Scan (AsyncAppend) + Output: region, temperature, timec, (PARTIAL avg(temperature)) + -> Merge Append + Sort Key: conditions.region, conditions.temperature, conditions.timec + -> Custom Scan (DataNodeScan) + Output: conditions.region, conditions.temperature, conditions.timec, (PARTIAL avg(conditions.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_1 + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_1.region, conditions_1.temperature, conditions_1.timec, (PARTIAL avg(conditions_1.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_2 + Chunks: _dist_hyper_1_9_chunk, _dist_hyper_1_10_chunk, _dist_hyper_1_11_chunk, _dist_hyper_1_12_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST + -> Custom Scan (DataNodeScan) + Output: conditions_2.region, conditions_2.temperature, conditions_2.timec, (PARTIAL avg(conditions_2.temperature)) + Relations: Aggregate on (public.conditions) + Data node: data_node_3 + Chunks: _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk + Remote SQL: SELECT region, temperature, timec, _timescaledb_internal.partialize_agg(avg(temperature)) FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) GROUP BY 1, 2, 3 ORDER BY region ASC NULLS LAST, temperature ASC NULLS LAST, timec ASC NULLS LAST +(26 rows) + -- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off \set GROUPING 'location' SELECT format('%s/results/dist_agg_loc_results_test.out', :'TEST_OUTPUT_DIR') as "RESULTS_TEST1", diff --git a/tsl/test/sql/include/aggregate_queries.sql b/tsl/test/sql/include/aggregate_queries.sql index e8a30cb5fb0..10867d522ba 100644 --- a/tsl/test/sql/include/aggregate_queries.sql +++ b/tsl/test/sql/include/aggregate_queries.sql @@ -67,3 +67,18 @@ FROM :TEST_TABLE GROUP BY :GROUPING, timec ORDER BY :GROUPING, timec; + +-- Aggregates nested in expressions and no top-level aggregate #3672 +:PREFIX SELECT :GROUPING, + sum(temperature)+sum(humidity) as agg_sum_expr + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + ORDER BY :GROUPING, timec; + +-- Aggregates with no aggregate reference in targetlist #3664 +:PREFIX SELECT :GROUPING + FROM :TEST_TABLE + GROUP BY :GROUPING, timec + HAVING avg(temperature) > 20 + ORDER BY :GROUPING, timec; +