From b092bd60e6dc3be40f6892b3bbef4c6574759390 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Thu, 23 Mar 2023 17:00:10 -0400 Subject: [PATCH] opt: fixup CTE stats on placeholder queries During optbuilder phase we copy the initial expressions stats into the fake-rel but this value can change when placeholders are assigned so add a normalization step to do this so it happens on the assignplaceholder path. Fixes: #99389 Epic: none Release note: none --- pkg/sql/opt/norm/rules/with.opt | 11 ++++ pkg/sql/opt/norm/testdata/rules/with | 78 ++++++++++++++++++++++++++++ pkg/sql/opt/norm/with_funcs.go | 29 +++++++++++ 3 files changed, 118 insertions(+) diff --git a/pkg/sql/opt/norm/rules/with.opt b/pkg/sql/opt/norm/rules/with.opt index f1e055ef1622..c1d2c8b237a1 100644 --- a/pkg/sql/opt/norm/rules/with.opt +++ b/pkg/sql/opt/norm/rules/with.opt @@ -64,3 +64,14 @@ ) $private ) + +# ReapplyBindingRowCount propagates stats changed after assign-placeholders. +[ReapplyBindingRowCount, Normalize] +(RecursiveCTE + $binding:* + $initial:* & (RowCountDifferent $binding $initial) + $recursive:* + $private:* +) +=> +(ReapplyBindingRowCount $binding $initial $recursive $private) diff --git a/pkg/sql/opt/norm/testdata/rules/with b/pkg/sql/opt/norm/testdata/rules/with index 3f024cb2a237..2c4b2edb5cb5 100644 --- a/pkg/sql/opt/norm/testdata/rules/with +++ b/pkg/sql/opt/norm/testdata/rules/with @@ -3005,3 +3005,81 @@ project │ └── i:3 + 1 [as="?column?":4, outer=(3), immutable] └── projections └── i:2 [as=i:5, outer=(2)] + +exec-ddl +CREATE TABLE yz (y INT, z INT, INDEX (y) STORING (z)); +---- + +exec-ddl +ALTER TABLE yz INJECT STATISTICS '[ + { + "columns": ["y"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100000, + "distinct_count": 100000 + }, + { + "columns": ["z"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100000, + "distinct_count": 1 + } +]'; +---- + +# Regression test for #99389. +assign-placeholders-norm query-args=(1) format=show-stats +WITH RECURSIVE cte(a,b) AS ( + (SELECT * FROM yz WHERE y = $1) + UNION ALL + (SELECT y+1, z FROM yz LEFT JOIN cte ON b = z) +) +SELECT * FROM cte; +---- +project + ├── columns: a:16 b:17 + ├── immutable + ├── stats: [rows=10] + ├── recursive-c-t-e + │ ├── columns: a:6 b:7 + │ ├── working table binding: &1 + │ ├── initial columns: y:1 z:2 + │ ├── recursive columns: "?column?":15 z:9 + │ ├── immutable + │ ├── stats: [rows=10] + │ ├── fake-rel + │ │ ├── columns: a:6 b:7 + │ │ ├── cardinality: [1 - ] + │ │ └── stats: [rows=1.00001] + │ ├── select + │ │ ├── columns: y:1!null z:2 + │ │ ├── stats: [rows=1.00001, distinct(1)=1, null(1)=0] + │ │ ├── fd: ()-->(1) + │ │ ├── scan yz + │ │ │ ├── columns: y:1 z:2 + │ │ │ └── stats: [rows=100000, distinct(1)=100000, null(1)=0] + │ │ └── filters + │ │ └── y:1 = 1 [outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] + │ └── project + │ ├── columns: "?column?":15 z:9 + │ ├── immutable + │ ├── stats: [rows=1000000] + │ ├── left-join (hash) + │ │ ├── columns: y:8 z:9 b:14 + │ │ ├── stats: [rows=1000000, distinct(14)=1, null(14)=0] + │ │ ├── scan yz + │ │ │ ├── columns: y:8 z:9 + │ │ │ └── stats: [rows=100000, distinct(9)=1, null(9)=0] + │ │ ├── with-scan &1 (cte) + │ │ │ ├── columns: b:14 + │ │ │ ├── mapping: + │ │ │ │ └── b:7 => b:14 + │ │ │ ├── cardinality: [1 - ] + │ │ │ └── stats: [rows=33333.33, distinct(14)=3333.33, null(14)=333.333] + │ │ └── filters + │ │ └── b:14 = z:9 [outer=(9,14), constraints=(/9: (/NULL - ]; /14: (/NULL - ]), fd=(9)==(14), (14)==(9)] + │ └── projections + │ └── y:8 + 1 [as="?column?":15, outer=(8), immutable] + └── projections + ├── a:6 [as=a:16, outer=(6)] + └── b:7 [as=b:17, outer=(7)] diff --git a/pkg/sql/opt/norm/with_funcs.go b/pkg/sql/opt/norm/with_funcs.go index f04152efc550..af79ad693e58 100644 --- a/pkg/sql/opt/norm/with_funcs.go +++ b/pkg/sql/opt/norm/with_funcs.go @@ -297,3 +297,32 @@ func (c *CustomFuncs) CanAddRecursiveLimit( func (c *CustomFuncs) GetRecursiveWithID(private *memo.RecursiveCTEPrivate) opt.WithID { return private.WithID } + +func (c *CustomFuncs) ReapplyBindingRowCount( + binding, initial, recursive memo.RelExpr, private *memo.RecursiveCTEPrivate, +) memo.RelExpr { + // The properties of the binding are tricky: the recursive expression is + // invoked repeatedly and these must hold each time. We can't use the initial + // expression's properties directly, as those only hold the first time the + // recursive query is executed. We don't really know the input row count, + // except for the first time we run the recursive query. We don't have + // anything better though. + card := initial.Relational().Cardinality.Union(recursive.Relational().Cardinality) + newRowCount := initial.Relational().Statistics().RowCount + newBinding := c.f.ConstructFakeRel(&memo.FakeRelPrivate{ + Props: MakeBindingPropsForRecursiveCTE(card, binding.Relational().OutputCols, newRowCount), + }) + var replace ReplaceFunc + replace = func(e opt.Expr) opt.Expr { + if e == binding { + return newBinding + } + return c.f.Replace(e, replace) + } + newRecursive := replace(recursive).(memo.RelExpr) + return c.f.ConstructRecursiveCTE(newBinding, initial, newRecursive, private) +} + +func (c *CustomFuncs) RowCountDifferent(binding, initial memo.RelExpr) bool { + return initial.Relational().Statistics().RowCount != binding.Relational().Statistics().RowCount +}