Skip to content

Commit

Permalink
opt: fixup CTE stats on placeholder queries
Browse files Browse the repository at this point in the history
During optbuilder phase we copy the initial expressions stats into the
fake-rel but this value can change when placeholders are assigned so add
a normalization step to do this so it happens on the assignplaceholder
path.

Fixes: cockroachdb#99389
Epic: none
Release note: none
  • Loading branch information
cucaroach committed Mar 27, 2023
1 parent ea389ff commit b092bd6
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
11 changes: 11 additions & 0 deletions pkg/sql/opt/norm/rules/with.opt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,14 @@
)
$private
)

# ReapplyBindingRowCount propagates stats changed after assign-placeholders.
[ReapplyBindingRowCount, Normalize]
(RecursiveCTE
$binding:*
$initial:* & (RowCountDifferent $binding $initial)
$recursive:*
$private:*
)
=>
(ReapplyBindingRowCount $binding $initial $recursive $private)
78 changes: 78 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/with
Original file line number Diff line number Diff line change
Expand Up @@ -3005,3 +3005,81 @@ project
│ └── i:3 + 1 [as="?column?":4, outer=(3), immutable]
└── projections
└── i:2 [as=i:5, outer=(2)]

exec-ddl
CREATE TABLE yz (y INT, z INT, INDEX (y) STORING (z));
----

exec-ddl
ALTER TABLE yz INJECT STATISTICS '[
{
"columns": ["y"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100000,
"distinct_count": 100000
},
{
"columns": ["z"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100000,
"distinct_count": 1
}
]';
----

# Regression test for #99389.
assign-placeholders-norm query-args=(1) format=show-stats
WITH RECURSIVE cte(a,b) AS (
(SELECT * FROM yz WHERE y = $1)
UNION ALL
(SELECT y+1, z FROM yz LEFT JOIN cte ON b = z)
)
SELECT * FROM cte;
----
project
├── columns: a:16 b:17
├── immutable
├── stats: [rows=10]
├── recursive-c-t-e
│ ├── columns: a:6 b:7
│ ├── working table binding: &1
│ ├── initial columns: y:1 z:2
│ ├── recursive columns: "?column?":15 z:9
│ ├── immutable
│ ├── stats: [rows=10]
│ ├── fake-rel
│ │ ├── columns: a:6 b:7
│ │ ├── cardinality: [1 - ]
│ │ └── stats: [rows=1.00001]
│ ├── select
│ │ ├── columns: y:1!null z:2
│ │ ├── stats: [rows=1.00001, distinct(1)=1, null(1)=0]
│ │ ├── fd: ()-->(1)
│ │ ├── scan yz
│ │ │ ├── columns: y:1 z:2
│ │ │ └── stats: [rows=100000, distinct(1)=100000, null(1)=0]
│ │ └── filters
│ │ └── y:1 = 1 [outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
│ └── project
│ ├── columns: "?column?":15 z:9
│ ├── immutable
│ ├── stats: [rows=1000000]
│ ├── left-join (hash)
│ │ ├── columns: y:8 z:9 b:14
│ │ ├── stats: [rows=1000000, distinct(14)=1, null(14)=0]
│ │ ├── scan yz
│ │ │ ├── columns: y:8 z:9
│ │ │ └── stats: [rows=100000, distinct(9)=1, null(9)=0]
│ │ ├── with-scan &1 (cte)
│ │ │ ├── columns: b:14
│ │ │ ├── mapping:
│ │ │ │ └── b:7 => b:14
│ │ │ ├── cardinality: [1 - ]
│ │ │ └── stats: [rows=33333.33, distinct(14)=3333.33, null(14)=333.333]
│ │ └── filters
│ │ └── b:14 = z:9 [outer=(9,14), constraints=(/9: (/NULL - ]; /14: (/NULL - ]), fd=(9)==(14), (14)==(9)]
│ └── projections
│ └── y:8 + 1 [as="?column?":15, outer=(8), immutable]
└── projections
├── a:6 [as=a:16, outer=(6)]
└── b:7 [as=b:17, outer=(7)]
29 changes: 29 additions & 0 deletions pkg/sql/opt/norm/with_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,32 @@ func (c *CustomFuncs) CanAddRecursiveLimit(
func (c *CustomFuncs) GetRecursiveWithID(private *memo.RecursiveCTEPrivate) opt.WithID {
return private.WithID
}

func (c *CustomFuncs) ReapplyBindingRowCount(
binding, initial, recursive memo.RelExpr, private *memo.RecursiveCTEPrivate,
) memo.RelExpr {
// The properties of the binding are tricky: the recursive expression is
// invoked repeatedly and these must hold each time. We can't use the initial
// expression's properties directly, as those only hold the first time the
// recursive query is executed. We don't really know the input row count,
// except for the first time we run the recursive query. We don't have
// anything better though.
card := initial.Relational().Cardinality.Union(recursive.Relational().Cardinality)
newRowCount := initial.Relational().Statistics().RowCount
newBinding := c.f.ConstructFakeRel(&memo.FakeRelPrivate{
Props: MakeBindingPropsForRecursiveCTE(card, binding.Relational().OutputCols, newRowCount),
})
var replace ReplaceFunc
replace = func(e opt.Expr) opt.Expr {
if e == binding {
return newBinding
}
return c.f.Replace(e, replace)
}
newRecursive := replace(recursive).(memo.RelExpr)
return c.f.ConstructRecursiveCTE(newBinding, initial, newRecursive, private)
}

func (c *CustomFuncs) RowCountDifferent(binding, initial memo.RelExpr) bool {
return initial.Relational().Statistics().RowCount != binding.Relational().Statistics().RowCount
}

0 comments on commit b092bd6

Please sign in to comment.