From fa941da76ce96e384b37da1bceb1c65f18273bce Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Fri, 13 Dec 2019 13:32:46 -0500 Subject: [PATCH] opt: inline Values inside Any In an expression like `a IN (VALUES (a11, a12), (b11, b12), ...)`, the Values clause is a subquery, but since it has fixed results we can just inline the values directly: `a IN ((a11, a12), (b11, b12), ...)`. This leads to better plans, e.g we can constrain scans to these values. The two forms are equivalent, even if the expressions have side-effects. Release note (performance improvement): better execution plans when a `VALUES` clause is used as the right-hand side of `IN` or `ANY`. --- .../logictest/testdata/logic_test/apply_join | 40 ------- pkg/sql/opt/norm/custom_funcs.go | 61 +++++++++++ pkg/sql/opt/norm/rules/scalar.opt | 35 ++++++ pkg/sql/opt/norm/testdata/rules/scalar | 103 ++++++++++++++++++ 4 files changed, 199 insertions(+), 40 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/apply_join b/pkg/sql/logictest/testdata/logic_test/apply_join index 3264b2001ae5..00cfb1cd4100 100644 --- a/pkg/sql/logictest/testdata/logic_test/apply_join +++ b/pkg/sql/logictest/testdata/logic_test/apply_join @@ -279,46 +279,6 @@ SELECT a, (SELECT a FROM y) FROM x ---- 1 1 -# Regression test for #40590: non-executable apply join inside apply join. - -statement ok -CREATE TABLE IF NOT EXISTS tab_orig AS - SELECT - '2001-01-01'::TIMESTAMP + g * '1 day' AS _timestamp, - g AS _string - FROM - generate_series(NULL, NULL) AS g; - -statement error could not decorrelate subquery -SELECT - NULL -FROM - tab_orig AS tab_9962, tab_orig AS tab_9963 JOIN tab_orig AS tab_9964 ON true -WHERE - NOT - ( - tab_9964._timestamp IN (SELECT tab_9962._timestamp) - OR EXISTS( - WITH - with_2063 AS (SELECT NULL) - SELECT - * - FROM - ( - SELECT - COALESCE( - tab_9962._string, - tab_9963._string, - (SELECT * FROM with_2063), - (SELECT * FROM with_2063) - ) - AS foo - ) - WHERE - foo IS NOT NULL - ) - ) - # Regression test for #40589. statement ok CREATE TABLE IF NOT EXISTS t40589 AS diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go index 0b94dc19ac31..6825dd8ea70f 100644 --- a/pkg/sql/opt/norm/custom_funcs.go +++ b/pkg/sql/opt/norm/custom_funcs.go @@ -1838,6 +1838,11 @@ func (c *CustomFuncs) SubqueryRequestedCol(sub *memo.SubqueryPrivate) opt.Column return sub.RequestedCol } +// SubqueryCmp returns the comparison operation from a SubqueryPrivate. +func (c *CustomFuncs) SubqueryCmp(sub *memo.SubqueryPrivate) opt.Operator { + return sub.Cmp +} + // MakeArrayAggCol returns a ColPrivate with the given type and an "array_agg" label. func (c *CustomFuncs) MakeArrayAggCol(typ *types.T) *memo.ColPrivate { return &memo.ColPrivate{Col: c.mem.Metadata().AddColumn("array_agg", typ)} @@ -1866,6 +1871,62 @@ func (c *CustomFuncs) MakeLimited(sub *memo.SubqueryPrivate) *memo.SubqueryPriva return &newSub } +// ValuesCols returns the column list from a ValuesExpr. +func (c *CustomFuncs) ValuesCols(values memo.RelExpr) opt.ColList { + return values.(*memo.ValuesExpr).Cols +} + +// IsTupleOfVars returns true if the given tuple contains Variables +// corresponding to the given columns (in the same order). +func (c *CustomFuncs) IsTupleOfVars(tuple opt.ScalarExpr, cols opt.ColList) bool { + t := tuple.(*memo.TupleExpr) + if len(t.Elems) != len(cols) { + return false + } + for i := range t.Elems { + v, ok := t.Elems[i].(*memo.VariableExpr) + if !ok || v.Col != cols[i] { + return false + } + } + return true +} + +// InlineValues converts a Values operator to a tuple. If there are +// multiple columns, the result is a tuple of tuples. +func (c *CustomFuncs) InlineValues(v memo.RelExpr) *memo.TupleExpr { + values := v.(*memo.ValuesExpr) + md := c.mem.Metadata() + if len(values.Cols) > 1 { + colTypes := make([]types.T, len(values.Cols)) + for i, colID := range values.Cols { + colTypes[i] = *md.ColumnMeta(colID).Type + } + // Inlining a multi-column VALUES results in a tuple of tuples. Example: + // + // (a,b) IN (VALUES (1,1), (2,2)) + // => + // (a,b) IN ((1,1), (2,2)) + return &memo.TupleExpr{ + Elems: values.Rows, + Typ: types.MakeTuple([]types.T{*types.MakeTuple(colTypes)}), + } + } + // Inlining a sngle-column VALUES results in a simple tuple. Example: + // a IN (VALUES (1), (2)) + // => + // a IN (1, 2) + colType := md.ColumnMeta(values.Cols[0]).Type + tuple := &memo.TupleExpr{ + Elems: make(memo.ScalarListExpr, len(values.Rows)), + Typ: types.MakeTuple([]types.T{*colType}), + } + for i := range values.Rows { + tuple.Elems[i] = values.Rows[i].(*memo.TupleExpr).Elems[0] + } + return tuple +} + // ---------------------------------------------------------------------- // // Numeric Rules diff --git a/pkg/sql/opt/norm/rules/scalar.opt b/pkg/sql/opt/norm/rules/scalar.opt index aa7fdd5412e7..c12c275a133b 100644 --- a/pkg/sql/opt/norm/rules/scalar.opt +++ b/pkg/sql/opt/norm/rules/scalar.opt @@ -211,6 +211,41 @@ => (SimplifyWhens $condition $whens $orElse) +# InlineAnyValuesSingleCol converts Any with Values input to AnyScalar. +# This version handles the case where there is a single column. +[InlineAnyValuesSingleCol, Normalize] +(Any + $values:(Values) + $scalar:* + $private:* +) +=> +(AnyScalar + $scalar + (InlineValues $values) + (SubqueryCmp $private) +) + +# InlineAnyValuesMultiCol converts Any with Values input to AnyScalar. +# This version handles the case where there are multiple columns; in this case, +# the Values is wrapped into a Project that converts each row to a tuple. +[InlineAnyValuesMultiCol, Normalize] +(Any + (Project + $values:(Values) + [ (ProjectionsItem $tuple:(Tuple)) ] & (IsTupleOfVars $tuple (ValuesCols $values)) + $passthrough:* & (ColsAreEmpty $passthrough) + ) + $scalar:* + $private:* +) +=> +(AnyScalar + $scalar + (InlineValues $values) + (SubqueryCmp $private) +) + # SimplifyEqualsAnyTuple converts a scalar ANY operation to an IN comparison. # It transforms # diff --git a/pkg/sql/opt/norm/testdata/rules/scalar b/pkg/sql/opt/norm/testdata/rules/scalar index 512e8a4d2294..359532e1652d 100644 --- a/pkg/sql/opt/norm/testdata/rules/scalar +++ b/pkg/sql/opt/norm/testdata/rules/scalar @@ -893,6 +893,109 @@ project ├── key: (1) └── fd: (1)-->(5) +# -------------------------------------------------- +# InlineAnyValuesSingleCol +# -------------------------------------------------- + +opt expect=InlineAnyValuesSingleCol +SELECT k FROM a WHERE k IN (VALUES (1), (2), (3)) +---- +scan a + ├── columns: k:1(int!null) + ├── constraint: /1: [/1 - /3] + ├── cardinality: [0 - 3] + └── key: (1) + +opt expect=InlineAnyValuesSingleCol +SELECT k FROM a WHERE k IN (VALUES ((SELECT k*i FROM a)), (2), (3)) +---- +select + ├── columns: k:1(int!null) + ├── key: (1) + ├── scan a + │ ├── columns: k:1(int!null) + │ └── key: (1) + └── filters + └── in [type=bool, outer=(1), subquery] + ├── variable: k [type=int] + └── tuple [type=tuple{int}] + ├── subquery [type=int] + │ └── max1-row + │ ├── columns: "?column?":13(int) + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(13) + │ └── project + │ ├── columns: "?column?":13(int) + │ ├── scan a + │ │ ├── columns: k:7(int!null) i:8(int) + │ │ ├── key: (7) + │ │ └── fd: (7)-->(8) + │ └── projections + │ └── k * i [type=int, outer=(7,8)] + ├── const: 2 [type=int] + └── const: 3 [type=int] + +# -------------------------------------------------- +# InlineAnyValuesMultiCol +# -------------------------------------------------- + +opt expect=InlineAnyValuesMultiCol +SELECT k FROM a WHERE (k, i) IN (VALUES (1, 1), (2, 2), (3, 3)) +---- +project + ├── columns: k:1(int!null) + ├── cardinality: [0 - 3] + ├── key: (1) + └── select + ├── columns: k:1(int!null) i:2(int!null) + ├── cardinality: [0 - 3] + ├── key: (1) + ├── fd: (1)-->(2) + ├── scan a + │ ├── columns: k:1(int!null) i:2(int) + │ ├── constraint: /1: [/1 - /3] + │ ├── cardinality: [0 - 3] + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── filters + └── (k, i) IN ((1, 1), (2, 2), (3, 3)) [type=bool, outer=(1,2), constraints=(/1/2: [/1/1 - /1/1] [/2/2 - /2/2] [/3/3 - /3/3]; /2: [/1 - /1] [/2 - /2] [/3 - /3]; tight)] + +# The rule should not fire if the columns are not in the right order. +opt expect-not=InlineAnyValuesMultiCol +SELECT k FROM a WHERE (k, i) IN (SELECT b, a FROM (VALUES (1, 1), (2, 2), (3, 3)) AS v(a,b)) +---- +project + ├── columns: k:1(int!null) + ├── key: (1) + └── semi-join (hash) + ├── columns: k:1(int!null) column10:10(tuple{int, int}) + ├── key: (1) + ├── fd: (1)-->(10) + ├── project + │ ├── columns: column10:10(tuple{int, int}) k:1(int!null) + │ ├── key: (1) + │ ├── fd: (1)-->(10) + │ ├── scan a + │ │ ├── columns: k:1(int!null) i:2(int) + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2) + │ └── projections + │ └── (k, i) [type=tuple{int, int}, outer=(1,2)] + ├── project + │ ├── columns: column9:9(tuple{int, int}) + │ ├── cardinality: [3 - 3] + │ ├── values + │ │ ├── columns: column1:7(int!null) column2:8(int!null) + │ │ ├── cardinality: [3 - 3] + │ │ ├── (1, 1) [type=tuple{int, int}] + │ │ ├── (2, 2) [type=tuple{int, int}] + │ │ └── (3, 3) [type=tuple{int, int}] + │ └── projections + │ └── (column2, column1) [type=tuple{int, int}, outer=(7,8)] + └── filters + └── column10 = column9 [type=bool, outer=(9,10), constraints=(/9: (/NULL - ]; /10: (/NULL - ]), fd=(9)==(10), (10)==(9)] + # -------------------------------------------------- # SimplifyEqualsAnyTuple # --------------------------------------------------