From df6d188a77322045333928fdf7fea772757d4b32 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Thu, 26 Sep 2019 09:29:42 -0400 Subject: [PATCH] opt: add equality push-down rule and map equalities in AssociateJoin rule This commit adds a new normalization rule to enable pushing variable equality conditions such as a.x=b.x through joins. For example, consider this query: SELECT * FROM a, b, c WHERE a.x=b.x AND a.x=c.x Given join ordering (a join (b join c)), it should be possible to infer the filter b.x=c.x and push it down from the top level onto the join (b join c). This commit enables that mapping and pushdown to happen. In addition, this commit updates the AssociateJoin rule to map as many equality conditions as possible to use the output columns of the new inner-most join, allowing those conditions to be pushed onto that join. For example, consider this query: SELECT * FROM a, b, c WHERE a.x=b.x AND b.x=c.x If the AssociateJoin rule creates a new join ordering (b join (a join c)), it should be possible to map a.x=b.x to a.x=c.x and add it onto the new inner-most join (a join c). This commit enables that mapping to happen. Release note (performance improvement): Improved performance for some join queries due to improved filter inference during planning. Release justification: This commit will not be merged before the release branch is cut. --- .../testdata/distsql_interleaved_join | 2 +- pkg/sql/opt/exec/execbuilder/testdata/join | 51 ++- .../opt/exec/execbuilder/testdata/update_from | 63 ++- pkg/sql/opt/norm/join.go | 177 ++++++++- pkg/sql/opt/norm/rules/join.opt | 36 ++ pkg/sql/opt/norm/testdata/rules/join | 243 ++++++++++- pkg/sql/opt/optbuilder/testdata/update_from | 90 ++--- pkg/sql/opt/xform/rules/join.opt | 16 +- pkg/sql/opt/xform/testdata/rules/join | 376 +++++++++++++++--- 9 files changed, 866 insertions(+), 188 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/distsql_interleaved_join b/pkg/sql/opt/exec/execbuilder/testdata/distsql_interleaved_join index d0327b387acd..2b8e6e565b76 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/distsql_interleaved_join +++ b/pkg/sql/opt/exec/execbuilder/testdata/distsql_interleaved_join @@ -464,4 +464,4 @@ SELECT url FROM [EXPLAIN (DISTSQL) JOIN grandchild1 USING (pid1, cid1) ] ---- -https://cockroachdb.github.io/distsqlplan/decode.html#eJzUll2P4jYUhu_7K9zTm5nWyLHzsRCpUqp22rKisIVZqdIqF1nihUjZhDqh6mjEf69CmBI-xmeCRSTuCPZj-9jPkd5nKP5OwYfZw-jh50eyVin5dTr5g3x6-OvD6KfhmNz9Mpw9zv4c3ZPdlO_rCatIyazk5P1kOCbzZZLGnHycDce_kbtVEvP7emChoiw-HaVkXs0JgUKWx3IcfZUF-J-AAwUBFGyg4AAFF0IKK5XPZVHkqpryvAWG8b_gWxSSbLUuq79DCvNcSfCfoUzKVIIPw6yUKpXRP3Iqo1iq93mSScUsoBDLMkrS7Y4j-aWEao_ka6Segl1VQGGaLJbNkboIoFCvAxRS-aW8C_gP9z-qau72J1CYrEufBJwGggY2DRwauBBuKOTrcnfY_Rk_P5FlVCwPj1SxDoSbkEJRRgsJPt_Qt9f9GH1OdyUz93Dll1oarwIUZqsoK3zSY4J9x1yPceYIJti344-jEXP7jDNBoiwmNsnLpVRF-2rEQTXi1Wr2S62zXMVSyfhgsbAisSlnruT3qFjunp8fvf_uEWng7J-RBuLgIZ3tW7o06NNgcFT9vizboKwzZx7nvXzFuHt8AWf3dg725hc1Br_5xkDqbjaG16IxNH3RY0415lZjzhWbhHfcJLybJhEXiSpuXlSk7qao79qI2pCxx7z_rfWY17TWu6KoomNRRTei2heJat-8qEjdTVH7bUR93c0e619RT7tjPe1u9HQu0tO5eT2Rupt6Dtro2e8xbr0o6lrVR8NRbl3RUKdjQ53u4_CZE01lscqzQr4p7FpVTTJeyPqOinyt5vKDyufbberPyZbbBqdYFmU9KuqPYVYPVQd8O-yZwAMTmBudm7t6mre4MtEO9kzggQnMjc59dGUntDimrSZt6-_b1sL88M6sY9oxEVwPI4LrYURwPYwJjtCI4K6J4HoYEVwPI4LrYUxwhEYE90wEf2eiqB5GFNXDiKJ6GFMUoRFF-yaK6mFEUT2MKKqHMUURGlF0YKIoN8oJCI1IitCIpQiNaYrhWFYwCwtmacEsLhjmBbPAwI0SAz-JDK1s1dOYrXoas1VPo7YiOGZrm7B0-mZt0lJbGrO1VV5qjWO2noQHra3h5pv_AgAA___wPy5O +https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlU9r20AQxe_9FMuckmaKvZL8T1BQad2i4NqpnUIh6CCkiaNW2VVXq9Ji_N2LpDSRXXcVbHDxcTXz9H7zZmFXkH9PwYXFeDJ-e80KlbL389lHdjP-cjV540_Z2Tt_cb34NDlnDy0v64YsVCQ0Z5czf8qiuySNOfu88Kcf2FmWxPy8LixVKOK_q8iisicABCFjmob3lIN7AxwQLECwAcEBhB4ECJmSEeW5VGXLqhL48U9wuwiJyApdfg4QIqkI3BXoRKcELvhCk0op_EFzCmNSlzIRpDpdQIhJh0laOU7oVkPpkdyH6pdXowLCPFneNQuNQQCh_hcgpHSrzzx-gZ51cf5alaLHIyDMCu0yj6NnoWej56DXQ6-P3gCCNYIs9BN-rsMlgcvX-PwRS4x6uE5vc6w_2A9bAoSJlN-KjH2ViWBSlFSPfMOKb1QhGuGsf8I9MRVCqpgUxRtAwXoH_lS-klmHd7c6d3vbG958r93zU9p9y4iN3fePv3trr_ytU8q_ZcRG_oPj52_vlb99Svm3jNjIf3j8_J298ndOKf-WERv5j_7v27MDbk55JkVOz3pZuuXbRPGS6ocsl4WK6ErJqLKpj7NKV32IKdd1ldcHX9SlErAp5kaxtSHm22LL7NxibRvVjlnsHMLdM4r7Zuf-Ic4Do3hodh4e4jwy76rbck3Ml2zbO1i_-B0AAP__qaW5dA== diff --git a/pkg/sql/opt/exec/execbuilder/testdata/join b/pkg/sql/opt/exec/execbuilder/testdata/join index df7b3b5a0f3a..d581f4dd577f 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/join +++ b/pkg/sql/opt/exec/execbuilder/testdata/join @@ -93,32 +93,31 @@ EXPLAIN SELECT * FROM JOIN twocolumn AS c (d, e) ON a.b = c.d AND c.d = onecolumn.x LIMIT 1 ---- -· distributed false -· vectorized false -render · · - └── limit · · - │ count 1 - └── hash-join · · - │ type inner - │ equality (x) = (x) - ├── hash-join · · - │ │ type inner - │ │ equality (x) = (x) - │ ├── scan · · - │ │ table twocolumn@primary - │ │ spans ALL - │ └── scan · · - │ table onecolumn@primary - │ spans ALL - └── hash-join · · - │ type inner - │ equality (x) = (x) - ├── scan · · - │ table onecolumn@primary - │ spans ALL - └── scan · · -· table twocolumn@primary -· spans ALL +· distributed false +· vectorized false +limit · · + │ count 1 + └── hash-join · · + │ type inner + │ equality (x) = (x) + ├── hash-join · · + │ │ type inner + │ │ equality (x) = (x) + │ ├── scan · · + │ │ table onecolumn@primary + │ │ spans ALL + │ └── scan · · + │ table twocolumn@primary + │ spans ALL + └── hash-join · · + │ type inner + │ equality (x) = (x) + ├── scan · · + │ table onecolumn@primary + │ spans ALL + └── scan · · +· table twocolumn@primary +· spans ALL # The following queries verify that only the necessary columns are scanned. query TTTTT diff --git a/pkg/sql/opt/exec/execbuilder/testdata/update_from b/pkg/sql/opt/exec/execbuilder/testdata/update_from index e8675857d869..1224d3c93be4 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/update_from +++ b/pkg/sql/opt/exec/execbuilder/testdata/update_from @@ -155,39 +155,32 @@ CREATE TABLE ac (a INT, c INT) query TTT EXPLAIN UPDATE abc SET b = ab.b, c = ac.c FROM ab, ac WHERE abc.a = ab.a AND abc.a = ac.a ---- -· distributed false -· vectorized false -count · · - └── update · · - │ table abc - │ set b, c - │ strategy updater - └── render · · - └── distinct · · - │ distinct on a - │ order key a - └── merge-join · · - │ type inner - │ equality (a) = (a) - │ mergeJoinOrder +"(a=a)" - ├── merge-join · · - │ │ type inner - │ │ equality (a) = (a) - │ │ left cols are key · - │ │ mergeJoinOrder +"(a=a)" - │ ├── scan · · - │ │ table abc@primary - │ │ spans ALL - │ └── sort · · - │ │ order +a - │ └── scan · · - │ table ac@primary - │ spans ALL - └── sort · · - │ order +a - └── scan · · -· table ab@primary -· spans ALL +· distributed false +· vectorized false +count · · + └── update · · + │ table abc + │ set b, c + │ strategy updater + └── render · · + └── distinct · · + │ distinct on a + └── hash-join · · + │ type inner + │ equality (a) = (a) + ├── scan · · + │ table ab@primary + │ spans ALL + └── hash-join · · + │ type inner + │ equality (a) = (a) + │ right cols are key · + ├── scan · · + │ table ac@primary + │ spans ALL + └── scan · · +· table abc@primary +· spans ALL # Make sure UPDATE ... FROM works with LATERAL. query TTT @@ -216,14 +209,14 @@ run · · │ type inner │ equality (a) = (a) ├── scan · · - │ table ac@primary + │ table ab@primary │ spans ALL └── hash-join · · │ type inner │ equality (a) = (a) │ right cols are key · ├── scan · · - │ table ab@primary + │ table ac@primary │ spans ALL └── scan · · · table abc@primary diff --git a/pkg/sql/opt/norm/join.go b/pkg/sql/opt/norm/join.go index 2d85ac899887..4aa9a04471a8 100644 --- a/pkg/sql/opt/norm/join.go +++ b/pkg/sql/opt/norm/join.go @@ -98,6 +98,171 @@ func (c *CustomFuncs) SimplifyNotNullEquality( panic(errors.AssertionFailedf("invalid ops: %v, %v", testOp, constOp)) } +// CanMapJoinOpEqualities checks whether it is possible to map equality +// conditions in a join to use different variables so that the number of +// conditions crossing both sides of a join are minimized. +// See canMapJoinOpEquivalenceGroup for details. +func (c *CustomFuncs) CanMapJoinOpEqualities( + filters memo.FiltersExpr, leftCols, rightCols opt.ColSet, +) bool { + var equivFD props.FuncDepSet + for i := range filters { + equivFD.AddEquivFrom(&filters[i].ScalarProps(c.mem).FuncDeps) + } + equivReps := equivFD.EquivReps() + + for col, ok := equivReps.Next(0); ok; col, ok = equivReps.Next(col + 1) { + if c.canMapJoinOpEquivalenceGroup(filters, col, leftCols, rightCols) { + return true + } + } + + return false +} + +// canMapJoinOpEquivalenceGroup checks whether it is possible to map equality +// conditions in a join that form an equivalence group to use different +// variables so that the number of conditions crossing both sides of a join +// are minimized. +// +// Specifically, it finds the set of columns containing col that forms an +// equivalence group in filters. It splits that group into columns from +// the left and right sides of the join, and checks whether there are multiple +// equality conditions in filters that connect the two groups. If so, +// canMapJoinOpEquivalenceGroup returns true. +func (c *CustomFuncs) canMapJoinOpEquivalenceGroup( + filters memo.FiltersExpr, col opt.ColumnID, leftCols, rightCols opt.ColSet, +) bool { + eqCols := c.GetEquivColsWithEquivType(col, filters) + + // To map equality conditions, the equivalent columns must intersect + // both sides and must be fully bound by both sides. + if !(eqCols.Intersects(leftCols) && + eqCols.Intersects(rightCols) && + eqCols.SubsetOf(leftCols.Union(rightCols))) { + return false + } + + // If more than one equality condition connecting columns in the equivalence + // group spans both sides of the join, these conditions can be remapped. + found := 0 + for i := range filters { + fd := &filters[i].ScalarProps(c.mem).FuncDeps + filterEqCols := fd.ComputeEquivClosure(fd.EquivReps()) + if filterEqCols.Intersects(leftCols) && filterEqCols.Intersects(rightCols) && + filterEqCols.SubsetOf(eqCols) { + found++ + if found > 1 { + return true + } + } + } + + return false +} + +// MapJoinOpEqualities maps all variable equality conditions in filters to +// use columns in either leftCols or rightCols where possible. See +// canMapJoinOpEquivalenceGroup and mapJoinOpEquivalenceGroup for more info. +func (c *CustomFuncs) MapJoinOpEqualities( + filters memo.FiltersExpr, leftCols, rightCols opt.ColSet, +) memo.FiltersExpr { + var equivFD props.FuncDepSet + for i := range filters { + equivFD.AddEquivFrom(&filters[i].ScalarProps(c.mem).FuncDeps) + } + equivReps := equivFD.EquivReps() + + newFilters := filters + equivReps.ForEach(func(col opt.ColumnID) { + if c.canMapJoinOpEquivalenceGroup(newFilters, col, leftCols, rightCols) { + newFilters = c.mapJoinOpEquivalenceGroup(newFilters, col, leftCols, rightCols) + } + }) + + return newFilters +} + +// mapJoinOpEquivalenceGroup maps equality conditions in a join that form an +// equivalence group to use different variables so that the number of +// conditions crossing both sides of a join are minimized. This is useful for +// creating additional filter conditions that can be pushed down to either side +// of the join. +// +// To perform the mapping, mapJoinOpEquivalenceGroup finds the set of columns +// containing col that forms an equivalence group in filters. The result is +// a set of columns that are all equivalent, some on the left side of the join +// and some on the right side. mapJoinOpEquivalenceGroup constructs a new set of +// equalities that implies the same equivalency group, with the property that +// there is a single condition with one left column and one right column. +// For example, consider this query: +// +// SELECT * FROM a, b WHERE a.x = b.x AND a.x = a.y AND a.y = b.y +// +// It has an equivalence group {a.x, a.y, b.x, b.y}. The columns a.x and a.y +// are on the left side, and b.x and b.y are on the right side. Initially there +// are two conditions that cross both sides. After mapping, the query would be +// converted to: +// +// SELECT * FROM a, b WHERE a.x = a.y AND b.x = b.y AND a.x = b.x +// +func (c *CustomFuncs) mapJoinOpEquivalenceGroup( + filters memo.FiltersExpr, col opt.ColumnID, leftCols, rightCols opt.ColSet, +) memo.FiltersExpr { + eqCols := c.GetEquivColsWithEquivType(col, filters) + + // First remove all the equality conditions for this equivalence group. + newFilters := make(memo.FiltersExpr, 0, len(filters)) + for i := range filters { + fd := &filters[i].ScalarProps(c.mem).FuncDeps + filterEqCols := fd.ComputeEquivClosure(fd.EquivReps()) + if !filterEqCols.Empty() && filterEqCols.SubsetOf(eqCols) { + continue + } + newFilters = append(newFilters, filters[i]) + } + + // Now append new equality conditions that imply the same equivalency group, + // but only one condition should contain columns from both sides. + leftEqCols := leftCols.Intersection(eqCols) + rightEqCols := rightCols.Intersection(eqCols) + firstLeftCol, ok := leftEqCols.Next(0) + if !ok { + panic(errors.AssertionFailedf( + "mapJoinOpEquivalenceGroup called with equivalence group that does not intersect both sides", + )) + } + firstRightCol, ok := rightEqCols.Next(0) + if !ok { + panic(errors.AssertionFailedf( + "mapJoinOpEquivalenceGroup called with equivalence group that does not intersect both sides", + )) + } + + // Connect all the columns on the left. + for col, ok := leftEqCols.Next(firstLeftCol + 1); ok; col, ok = leftEqCols.Next(col + 1) { + newFilters = append(newFilters, memo.FiltersItem{ + Condition: c.f.ConstructEq(c.f.ConstructVariable(firstLeftCol), c.f.ConstructVariable(col)), + }) + } + + // Connect all the columns on the right. + for col, ok := rightEqCols.Next(firstRightCol + 1); ok; col, ok = rightEqCols.Next(col + 1) { + newFilters = append(newFilters, memo.FiltersItem{ + Condition: c.f.ConstructEq(c.f.ConstructVariable(firstRightCol), c.f.ConstructVariable(col)), + }) + } + + // Connect the two sides. + newFilters = append(newFilters, memo.FiltersItem{ + Condition: c.f.ConstructEq( + c.f.ConstructVariable(firstLeftCol), c.f.ConstructVariable(firstRightCol), + ), + }) + + return newFilters +} + // CanMapJoinOpFilter returns true if it is possible to map a boolean expression // src, which is a conjunct in the given filters expression, to use the output // columns of the relational expression dst. @@ -174,9 +339,8 @@ func (c *CustomFuncs) MapJoinOpFilter( return src.Condition } - // MapJoinOpFilter each column in src to one column in dst. We choose an - // arbitrary column (the one with the smallest ColumnID) if there are multiple - // choices. + // Map each column in src to one column in dst. We choose an arbitrary column + // (the one with the smallest ColumnID) if there are multiple choices. var colMap util.FastIntMap outerCols := src.ScalarProps(c.mem).OuterCols for srcCol, ok := outerCols.Next(0); ok; srcCol, ok = outerCols.Next(srcCol + 1) { @@ -188,7 +352,7 @@ func (c *CustomFuncs) MapJoinOpFilter( dstCol, ok := eqCols.Next(0) if !ok { panic(errors.AssertionFailedf( - "Map called on src that cannot be mapped to dst. src:\n%s\ndst:\n%s", + "MapJoinOpFilter called on src that cannot be mapped to dst. src:\n%s\ndst:\n%s", src, dst, )) } @@ -261,10 +425,11 @@ func (c *CustomFuncs) GetEquivColsWithEquivType( } // Compute all equivalent columns. - eqCols := opt.MakeColSet(col) + var equivFD props.FuncDepSet for i := range filters { - eqCols = filters[i].ScalarProps(c.mem).FuncDeps.ComputeEquivClosure(eqCols) + equivFD.AddEquivFrom(&filters[i].ScalarProps(c.mem).FuncDeps) } + eqCols := equivFD.ComputeEquivGroup(col) eqCols.ForEach(func(i opt.ColumnID) { // Only include columns that have the same type as col. diff --git a/pkg/sql/opt/norm/rules/join.opt b/pkg/sql/opt/norm/rules/join.opt index 5f4652b0b0bd..d64bc5b1f02c 100644 --- a/pkg/sql/opt/norm/rules/join.opt +++ b/pkg/sql/opt/norm/rules/join.opt @@ -148,6 +148,42 @@ $private ) +# MapEqualityIntoJoinLeftAndRight checks whether it is possible to map +# equality conditions in a join to use different variables so that the +# number of conditions crossing both sides of a join are minimized. If so, +# the MapEqualityConditions function performs this mapping to construct new +# filters. +# +# For example, consider this query: +# +# SELECT * FROM a, b WHERE a.x = b.x AND b.x = a.y; +# +# As written, both equality conditions contain variables from both sides of +# the join. We can rewrite this query, however, so that only one condition +# spans both sides: +# +# SELECT * FROM a, b WHERE a.x = a.y AND b.x = a.y; +# +# Now the condition a.x = a.y is fully bound by the left side of the join, +# and is available to be pushed down by PushFilterIntoJoinLeft. +# +# See the MapEqualityConditions function for more details. +[MapEqualityIntoJoinLeftAndRight, Normalize] +(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | RightJoin | + SemiJoin | SemiJoinApply | AntiJoin | AntiJoinApply + $left:* & ^(HasOuterCols $left) + $right:* & ^(HasOuterCols $right) + $on:* & (CanMapJoinOpEqualities $on $leftCols:(OutputCols $left) $rightCols:(OutputCols $right)) + $private:* +) +=> +((OpName) + $left + $right + (MapJoinOpEqualities $on $leftCols $rightCols) + $private +) + # PushFilterIntoJoinLeft pushes Join filter conditions into the left side of the # join. This is possible in the case of InnerJoin and RightJoin, as long as the # condition has no dependencies on the right side of the join. Left and Full diff --git a/pkg/sql/opt/norm/testdata/rules/join b/pkg/sql/opt/norm/testdata/rules/join index abcaff997678..81b3224c4bfd 100644 --- a/pkg/sql/opt/norm/testdata/rules/join +++ b/pkg/sql/opt/norm/testdata/rules/join @@ -383,29 +383,28 @@ inner-join (merge) └── filters (true) # Multiple equivalent columns. -# TODO(rytaft): We should also infer the equality predicates a.k=a.i and b.x=b.y. opt expect=MapFilterIntoJoinLeft SELECT * FROM a INNER JOIN b ON a.k=b.x AND a.i=b.x AND a.i=b.y AND a.f + b.y::FLOAT > 5 AND a.s || b.x::STRING = 'foo1' ---- -inner-join (lookup b) +inner-join (lookup a) ├── columns: k:1(int!null) i:2(int!null) f:3(float!null) s:4(string) j:5(jsonb) x:6(int!null) y:7(int!null) - ├── key columns: [1] = [6] + ├── key columns: [6] = [1] ├── key: (6) ├── fd: (1)-->(3-5), (1)==(2,6,7), (2)==(1,6,7), (6)==(1,2,7), (7)==(1,2,6) ├── select - │ ├── columns: k:1(int!null) i:2(int) f:3(float!null) s:4(string) j:5(jsonb) - │ ├── key: (1) - │ ├── fd: (1)-->(2-5) - │ ├── scan a - │ │ ├── columns: k:1(int!null) i:2(int) f:3(float!null) s:4(string) j:5(jsonb) - │ │ ├── key: (1) - │ │ └── fd: (1)-->(2-5) + │ ├── columns: x:6(int!null) y:7(int!null) + │ ├── key: (6) + │ ├── fd: (6)==(7), (7)==(6) + │ ├── scan b + │ │ ├── columns: x:6(int!null) y:7(int) + │ │ ├── key: (6) + │ │ └── fd: (6)-->(7) │ └── filters - │ ├── (f + i::FLOAT8) > 5.0 [type=bool, outer=(2,3)] - │ └── (s || k::STRING) = 'foo1' [type=bool, outer=(1,4)] + │ └── x = y [type=bool, outer=(6,7), constraints=(/6: (/NULL - ]; /7: (/NULL - ]), fd=(6)==(7), (7)==(6)] └── filters - ├── i = x [type=bool, outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] - └── i = y [type=bool, outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + ├── (f + k::FLOAT8) > 5.0 [type=bool, outer=(1,3)] + ├── (s || k::STRING) = 'foo1' [type=bool, outer=(1,4)] + └── k = i [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] # Can push to both sides with semi-join. opt expect=(MapFilterIntoJoinLeft,MapFilterIntoJoinRight) @@ -1115,6 +1114,221 @@ inner-join (merge) │ └── const: 5 [type=int] └── variable: k [type=int] +# --------------------------------- +# MapEqualityIntoJoinLeftAndRight +# --------------------------------- + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * FROM (SELECT a.k AS a_k, b.x AS b_x FROM a, b) JOIN (SELECT c.x AS c_x, d.x AS d_x FROM c, d) +ON a_k = c_x AND c_x = b_x AND b_x = d_x +---- +inner-join (merge) + ├── columns: a_k:1(int!null) b_x:6(int!null) c_x:8(int!null) d_x:11(int!null) + ├── left ordering: +1 + ├── right ordering: +8 + ├── key: (11) + ├── fd: (1)==(6,8,11), (6)==(1,8,11), (8)==(1,6,11), (11)==(1,6,8) + ├── inner-join (merge) + │ ├── columns: k:1(int!null) b.x:6(int!null) + │ ├── left ordering: +1 + │ ├── right ordering: +6 + │ ├── key: (6) + │ ├── fd: (1)==(6), (6)==(1) + │ ├── ordering: +(1|6) [actual: +1] + │ ├── scan a + │ │ ├── columns: k:1(int!null) + │ │ ├── key: (1) + │ │ └── ordering: +1 + │ ├── scan b + │ │ ├── columns: b.x:6(int!null) + │ │ ├── key: (6) + │ │ └── ordering: +6 + │ └── filters (true) + ├── inner-join (merge) + │ ├── columns: c.x:8(int!null) d.x:11(int!null) + │ ├── left ordering: +8 + │ ├── right ordering: +11 + │ ├── key: (11) + │ ├── fd: (8)==(11), (11)==(8) + │ ├── ordering: +(8|11) [actual: +8] + │ ├── scan c@secondary + │ │ ├── columns: c.x:8(int!null) + │ │ ├── key: (8) + │ │ └── ordering: +8 + │ ├── scan d + │ │ ├── columns: d.x:11(int!null) + │ │ ├── key: (11) + │ │ └── ordering: +11 + │ └── filters (true) + └── filters (true) + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * FROM (SELECT b.x AS b_x, c.x AS c_x FROM b, c), d WHERE b_x=d.x AND c_x=d.x +---- +inner-join (merge) + ├── columns: b_x:1(int!null) c_x:3(int!null) x:6(int!null) y:7(int!null) z:8(int!null) + ├── left ordering: +1 + ├── right ordering: +6 + ├── key: (6) + ├── fd: (1)==(3,6), (3)==(1,6), (6)-->(7,8), (6)==(1,3) + ├── inner-join (merge) + │ ├── columns: b.x:1(int!null) c.x:3(int!null) + │ ├── left ordering: +1 + │ ├── right ordering: +3 + │ ├── key: (3) + │ ├── fd: (1)==(3), (3)==(1) + │ ├── ordering: +(1|3) [actual: +1] + │ ├── scan b + │ │ ├── columns: b.x:1(int!null) + │ │ ├── key: (1) + │ │ └── ordering: +1 + │ ├── scan c@secondary + │ │ ├── columns: c.x:3(int!null) + │ │ ├── key: (3) + │ │ └── ordering: +3 + │ └── filters (true) + ├── scan d + │ ├── columns: d.x:6(int!null) d.y:7(int!null) d.z:8(int!null) + │ ├── key: (6) + │ ├── fd: (6)-->(7,8) + │ └── ordering: +6 + └── filters (true) + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * FROM b, c, d WHERE b.x=c.x AND b.x=d.x +---- +inner-join (merge) + ├── columns: x:1(int!null) y:2(int) x:3(int!null) y:4(int!null) z:5(int!null) x:6(int!null) y:7(int!null) z:8(int!null) + ├── left ordering: +1 + ├── right ordering: +3 + ├── key: (6) + ├── fd: (1)-->(2), (3)-->(4,5), (6)-->(7,8), (3)==(1,6), (6)==(1,3), (1)==(3,6) + ├── scan b + │ ├── columns: b.x:1(int!null) b.y:2(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── ordering: +1 + ├── inner-join (merge) + │ ├── columns: c.x:3(int!null) c.y:4(int!null) c.z:5(int!null) d.x:6(int!null) d.y:7(int!null) d.z:8(int!null) + │ ├── left ordering: +3 + │ ├── right ordering: +6 + │ ├── key: (6) + │ ├── fd: (3)-->(4,5), (6)-->(7,8), (3)==(6), (6)==(3) + │ ├── ordering: +(3|6) [actual: +3] + │ ├── scan c + │ │ ├── columns: c.x:3(int!null) c.y:4(int!null) c.z:5(int!null) + │ │ ├── key: (3) + │ │ ├── fd: (3)-->(4,5) + │ │ └── ordering: +3 + │ ├── scan d + │ │ ├── columns: d.x:6(int!null) d.y:7(int!null) d.z:8(int!null) + │ │ ├── key: (6) + │ │ ├── fd: (6)-->(7,8) + │ │ └── ordering: +6 + │ └── filters (true) + └── filters (true) + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * FROM c INNER JOIN d ON c.x = d.x AND d.x = c.y AND c.y = d.y AND d.y = c.z AND c.z = d.z AND d.z = c.x +---- +inner-join (lookup d) + ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) x:4(int!null) y:5(int!null) z:6(int!null) + ├── key columns: [1] = [4] + ├── key: (4) + ├── fd: (1)==(2-6), (2)==(1,3-6), (3)==(1,2,4-6), (4)==(1-3,5,6), (5)==(1-4,6), (6)==(1-5) + ├── select + │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ ├── key: (1) + │ ├── fd: (1)==(2,3), (2)==(1,3), (3)==(1,2) + │ ├── scan c + │ │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2,3) + │ └── filters + │ ├── c.x = c.y [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] + │ └── c.x = c.z [type=bool, outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)] + └── filters + ├── d.x = d.y [type=bool, outer=(4,5), constraints=(/4: (/NULL - ]; /5: (/NULL - ]), fd=(4)==(5), (5)==(4)] + └── d.x = d.z [type=bool, outer=(4,6), constraints=(/4: (/NULL - ]; /6: (/NULL - ]), fd=(4)==(6), (6)==(4)] + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * from c, d WHERE c.x = c.y AND c.x = d.x AND c.y = d.y; +---- +inner-join (lookup d) + ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) x:4(int!null) y:5(int!null) z:6(int!null) + ├── key columns: [1] = [4] + ├── key: (4) + ├── fd: (1)-->(3), (1)==(2,4,5), (2)==(1,4,5), (4)-->(6), (4)==(1,2,5), (5)==(1,2,4) + ├── select + │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ ├── key: (1) + │ ├── fd: (1)-->(3), (1)==(2), (2)==(1) + │ ├── scan c + │ │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2,3) + │ └── filters + │ └── c.x = c.y [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] + └── filters + └── d.x = d.y [type=bool, outer=(4,5), constraints=(/4: (/NULL - ]; /5: (/NULL - ]), fd=(4)==(5), (5)==(4)] + +opt expect=MapEqualityIntoJoinLeftAndRight +SELECT * FROM c, d WHERE c.x = d.x AND d.x = c.y AND c.y = d.y +---- +inner-join (lookup d) + ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) x:4(int!null) y:5(int!null) z:6(int!null) + ├── key columns: [1] = [4] + ├── key: (4) + ├── fd: (1)-->(3), (1)==(2,4,5), (2)==(1,4,5), (4)-->(6), (4)==(1,2,5), (5)==(1,2,4) + ├── select + │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ ├── key: (1) + │ ├── fd: (1)-->(3), (1)==(2), (2)==(1) + │ ├── scan c + │ │ ├── columns: c.x:1(int!null) c.y:2(int!null) c.z:3(int!null) + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2,3) + │ └── filters + │ └── c.x = c.y [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] + └── filters + └── d.x = d.y [type=bool, outer=(4,5), constraints=(/4: (/NULL - ]; /5: (/NULL - ]), fd=(4)==(5), (5)==(4)] + +exec-ddl +create table aa (a int, a1 int, a2 int) +---- + +exec-ddl +create table bb (b int, b1 int, b2 int) +---- + +exec-ddl +create table cc (c int, c1 int, c2 int) +---- + +opt expect=MapEqualityIntoJoinLeftAndRight +select * from aa, bb where a2 = b and b = a and a = b1 and b1 = a1 +---- +inner-join (hash) + ├── columns: a:1(int!null) a1:2(int!null) a2:3(int!null) b:5(int!null) b1:6(int!null) b2:7(int) + ├── fd: (1)==(2,3,5,6), (2)==(1,3,5,6), (3)==(1,2,5,6), (5)==(1-3,6), (6)==(1-3,5) + ├── select + │ ├── columns: a:1(int!null) a1:2(int!null) a2:3(int!null) + │ ├── fd: (1)==(2,3), (2)==(1,3), (3)==(1,2) + │ ├── scan aa + │ │ └── columns: a:1(int) a1:2(int) a2:3(int) + │ └── filters + │ ├── a = a1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] + │ └── a = a2 [type=bool, outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)] + ├── select + │ ├── columns: b:5(int!null) b1:6(int!null) b2:7(int) + │ ├── fd: (5)==(6), (6)==(5) + │ ├── scan bb + │ │ └── columns: b:5(int) b1:6(int) b2:7(int) + │ └── filters + │ └── b = b1 [type=bool, outer=(5,6), constraints=(/5: (/NULL - ]; /6: (/NULL - ]), fd=(5)==(6), (6)==(5)] + └── filters + └── a = b [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + # -------------------------------------------------- # PushFilterIntoJoinLeft + PushFilterIntoJoinRight # -------------------------------------------------- @@ -1456,7 +1670,6 @@ inner-join (hash) │ ├── key: (6) │ └── fd: (6)-->(7-10) └── filters - ├── a.k = a2.k [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] ├── a.k = a2.k [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] └── a2.f = a.f [type=bool, outer=(3,8), constraints=(/3: (/NULL - ]; /8: (/NULL - ]), fd=(3)==(8), (8)==(3)] diff --git a/pkg/sql/opt/optbuilder/testdata/update_from b/pkg/sql/opt/optbuilder/testdata/update_from index c370ddfcf3e6..653e83f65932 100644 --- a/pkg/sql/opt/optbuilder/testdata/update_from +++ b/pkg/sql/opt/optbuilder/testdata/update_from @@ -235,32 +235,24 @@ update abc └── distinct-on ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int) ab.b:8(int) ab.rowid:9(int) ac.a:10(int) ac.c:11(int) ac.rowid:12(int) ├── grouping columns: abc.a:4(int!null) - ├── internal-ordering: +(4|7|10) - ├── inner-join (merge) + ├── inner-join (hash) │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int!null) ab.b:8(int) ab.rowid:9(int!null) ac.a:10(int!null) ac.c:11(int) ac.rowid:12(int!null) - │ ├── left ordering: +4 - │ ├── right ordering: +7 - │ ├── ordering: +(4|7|10) - │ ├── inner-join (merge) + │ ├── scan ab + │ │ └── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) + │ ├── inner-join (hash) │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ac.a:10(int!null) ac.c:11(int) ac.rowid:12(int!null) - │ │ ├── left ordering: +4 - │ │ ├── right ordering: +10 - │ │ ├── ordering: +(4|10) + │ │ ├── scan ac + │ │ │ └── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) │ │ ├── scan abc - │ │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) - │ │ │ └── ordering: +4 - │ │ ├── sort - │ │ │ ├── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) - │ │ │ ├── ordering: +10 - │ │ │ └── scan ac - │ │ │ └── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) - │ │ └── filters (true) - │ ├── sort - │ │ ├── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) - │ │ ├── ordering: +7 - │ │ └── scan ab - │ │ └── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) - │ └── filters (true) + │ │ │ └── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) + │ │ └── filters + │ │ └── eq [type=bool] + │ │ ├── variable: abc.a [type=int] + │ │ └── variable: ac.a [type=int] + │ └── filters + │ └── eq [type=bool] + │ ├── variable: ab.a [type=int] + │ └── variable: abc.a [type=int] └── aggregations ├── first-agg [type=int] │ └── variable: abc.b [type=int] @@ -303,22 +295,22 @@ update abc ├── grouping columns: abc.a:4(int!null) ├── inner-join (hash) │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int!null) ab.b:8(int) ac.a:10(int!null) ac.c:11(int) - │ ├── scan ac - │ │ └── columns: ac.a:10(int) ac.c:11(int) + │ ├── scan ab + │ │ └── columns: ab.a:7(int) ab.b:8(int) │ ├── inner-join (hash) - │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int!null) ab.b:8(int) - │ │ ├── scan ab - │ │ │ └── columns: ab.a:7(int) ab.b:8(int) + │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ac.a:10(int!null) ac.c:11(int) + │ │ ├── scan ac + │ │ │ └── columns: ac.a:10(int) ac.c:11(int) │ │ ├── scan abc │ │ │ └── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) │ │ └── filters │ │ └── eq [type=bool] │ │ ├── variable: abc.a [type=int] - │ │ └── variable: ab.a [type=int] + │ │ └── variable: ac.a [type=int] │ └── filters │ └── eq [type=bool] │ ├── variable: ab.a [type=int] - │ └── variable: ac.a [type=int] + │ └── variable: abc.a [type=int] └── aggregations ├── first-agg [type=int] │ └── variable: abc.b [type=int] @@ -354,32 +346,24 @@ update abc └── distinct-on ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int) ab.b:8(int) ab.rowid:9(int) ac.a:10(int) ac.c:11(int) ac.rowid:12(int) ├── grouping columns: abc.a:4(int!null) - ├── internal-ordering: +(4|7|10) - ├── inner-join (merge) + ├── inner-join (hash) │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ab.a:7(int!null) ab.b:8(int) ab.rowid:9(int!null) ac.a:10(int!null) ac.c:11(int) ac.rowid:12(int!null) - │ ├── left ordering: +4 - │ ├── right ordering: +7 - │ ├── ordering: +(4|7|10) - │ ├── inner-join (merge) + │ ├── scan ab + │ │ └── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) + │ ├── inner-join (hash) │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) ac.a:10(int!null) ac.c:11(int) ac.rowid:12(int!null) - │ │ ├── left ordering: +4 - │ │ ├── right ordering: +10 - │ │ ├── ordering: +(4|10) + │ │ ├── scan ac + │ │ │ └── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) │ │ ├── scan abc - │ │ │ ├── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) - │ │ │ └── ordering: +4 - │ │ ├── sort - │ │ │ ├── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) - │ │ │ ├── ordering: +10 - │ │ │ └── scan ac - │ │ │ └── columns: ac.a:10(int) ac.c:11(int) ac.rowid:12(int!null) - │ │ └── filters (true) - │ ├── sort - │ │ ├── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) - │ │ ├── ordering: +7 - │ │ └── scan ab - │ │ └── columns: ab.a:7(int) ab.b:8(int) ab.rowid:9(int!null) - │ └── filters (true) + │ │ │ └── columns: abc.a:4(int!null) abc.b:5(int) abc.c:6(int) + │ │ └── filters + │ │ └── eq [type=bool] + │ │ ├── variable: abc.a [type=int] + │ │ └── variable: ac.a [type=int] + │ └── filters + │ └── eq [type=bool] + │ ├── variable: ab.a [type=int] + │ └── variable: abc.a [type=int] └── aggregations ├── first-agg [type=int] │ └── variable: abc.b [type=int] diff --git a/pkg/sql/opt/xform/rules/join.opt b/pkg/sql/opt/xform/rules/join.opt index 4c993fcf11c4..507536a503da 100644 --- a/pkg/sql/opt/xform/rules/join.opt +++ b/pkg/sql/opt/xform/rules/join.opt @@ -188,14 +188,20 @@ (InnerJoin $innerRight $right - (ExtractBoundConditions $on (OutputCols2 $innerRight $right)) + (SortFilters + (ExtractBoundConditions + $newOn:(MapJoinOpEqualities + (ConcatFilters $on $innerOn) + (OutputCols $innerLeft) + $cols:(OutputCols2 $innerRight $right) + ) + $cols + ) + ) (EmptyJoinPrivate) ) (SortFilters - (ConcatFilters - (ExtractUnboundConditions $on (OutputCols2 $innerRight $right)) - $innerOn - ) + (ExtractUnboundConditions $newOn $cols) ) (EmptyJoinPrivate) ) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index d77fe01de0ba..270e8fbb01e8 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -713,19 +713,19 @@ memo (optimized, ~8KB, required=[presentation: a:1,b:2,c:3,k:5]) opt SELECT * FROM abc JOIN xyz ON a=x AND a=y ---- -inner-join (merge) +inner-join (lookup abc@ab) ├── columns: a:1(int!null) b:2(int) c:3(int) x:5(int!null) y:6(int!null) z:7(int) - ├── left ordering: +1 - ├── right ordering: +5 - ├── fd: (1)==(5,6), (5)==(1,6), (6)==(1,5) - ├── scan abc@ab - │ ├── columns: a:1(int) b:2(int) c:3(int) - │ └── ordering: +1 - ├── scan xyz@xy - │ ├── columns: x:5(int) y:6(int) z:7(int) - │ └── ordering: +5 - └── filters - └── a = y [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + ├── key columns: [5] = [1] + ├── fd: (5)==(1,6), (6)==(1,5), (1)==(5,6) + ├── select + │ ├── columns: x:5(int!null) y:6(int!null) z:7(int) + │ ├── fd: (5)==(6), (6)==(5) + │ ├── scan xyz@xy + │ │ ├── columns: x:5(int!null) y:6(int) z:7(int) + │ │ └── constraint: /5/6/8: (/NULL - ] + │ └── filters + │ └── x = y [type=bool, outer=(5,6), constraints=(/5: (/NULL - ]; /6: (/NULL - ]), fd=(5)==(6), (6)==(5)] + └── filters (true) # Verify multiple merge-joins can be chained. opt @@ -946,49 +946,34 @@ GenerateLookupJoins Source expression: inner-join (hash) ├── columns: a:1(int!null) b:2(int) c:3(int) x:5(int!null) y:6(int!null) z:7(int) - ├── fd: (1)==(5,6), (5)==(1,6), (6)==(1,5) - ├── scan abc - │ └── columns: a:1(int) b:2(int) c:3(int) - ├── scan xyz - │ └── columns: x:5(int) y:6(int) z:7(int) - └── filters - ├── a = x [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] - └── a = y [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] - -New expression 1 of 1: - inner-join (lookup xyz@xy) - ├── columns: a:1(int!null) b:2(int) c:3(int) x:5(int!null) y:6(int!null) z:7(int) - ├── key columns: [1] = [5] - ├── fd: (1)==(5,6), (5)==(1,6), (6)==(1,5) - ├── scan abc - │ └── columns: a:1(int) b:2(int) c:3(int) - └── filters - └── a = y [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] - -================================================================================ -GenerateLookupJoins -================================================================================ -Source expression: - inner-join (hash) - ├── columns: a:1(int!null) b:2(int) c:3(int) x:5(int!null) y:6(int!null) z:7(int) - ├── fd: (1)==(5,6), (5)==(1,6), (6)==(1,5) - ├── scan xyz - │ └── columns: x:5(int) y:6(int) z:7(int) + ├── fd: (5)==(1,6), (6)==(1,5), (1)==(5,6) + ├── select + │ ├── columns: x:5(int!null) y:6(int!null) z:7(int) + │ ├── fd: (5)==(6), (6)==(5) + │ ├── scan xyz@xy + │ │ ├── columns: x:5(int!null) y:6(int) z:7(int) + │ │ └── constraint: /5/6/8: (/NULL - ] + │ └── filters + │ └── x = y [type=bool, outer=(5,6), constraints=(/5: (/NULL - ]; /6: (/NULL - ]), fd=(5)==(6), (6)==(5)] ├── scan abc │ └── columns: a:1(int) b:2(int) c:3(int) └── filters - ├── a = x [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] - └── a = y [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + └── a = x [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] New expression 1 of 1: inner-join (lookup abc@ab) ├── columns: a:1(int!null) b:2(int) c:3(int) x:5(int!null) y:6(int!null) z:7(int) ├── key columns: [5] = [1] - ├── fd: (1)==(5,6), (5)==(1,6), (6)==(1,5) - ├── scan xyz - │ └── columns: x:5(int) y:6(int) z:7(int) - └── filters - └── a = y [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + ├── fd: (5)==(1,6), (6)==(1,5), (1)==(5,6) + ├── select + │ ├── columns: x:5(int!null) y:6(int!null) z:7(int) + │ ├── fd: (5)==(6), (6)==(5) + │ ├── scan xyz@xy + │ │ ├── columns: x:5(int!null) y:6(int) z:7(int) + │ │ └── constraint: /5/6/8: (/NULL - ] + │ └── filters + │ └── x = y [type=bool, outer=(5,6), constraints=(/5: (/NULL - ]; /6: (/NULL - ]), fd=(5)==(6), (6)==(5)] + └── filters (true) ---- ---- @@ -2516,3 +2501,300 @@ project │ ├── constraint: /5/3: [/'2019-01-01' - /'2019-01-01'] │ └── fd: ()-->(5) └── filters (true) + +# -------------------------------------------------- +# AssociateJoin +# -------------------------------------------------- + +exec-ddl +ALTER TABLE abc INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-05-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + } +]' +---- + +exec-ddl +ALTER TABLE stu INJECT STATISTICS '[ + { + "columns": ["s"], + "created_at": "2018-05-01 1:00:00.00000+00:00", + "row_count": 10000, + "distinct_count": 1000 + } +]' +---- + +exec-ddl +ALTER TABLE xyz INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2018-05-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + } +]' +---- + +# Check that the equality condition abc.a = xyz.x is synthesized. +opt expect=AssociateJoin +SELECT * FROM abc, stu, xyz WHERE abc.a=stu.s AND stu.s=xyz.x +---- +inner-join (merge) + ├── columns: a:1(int!null) b:2(int) c:3(int) s:5(int!null) t:6(int!null) u:7(int!null) x:8(int!null) y:9(int) z:10(int) + ├── left ordering: +5 + ├── right ordering: +1 + ├── fd: (5)==(1,8), (8)==(1,5), (1)==(5,8) + ├── scan stu + │ ├── columns: s:5(int!null) t:6(int!null) u:7(int!null) + │ ├── key: (5-7) + │ └── ordering: +5 + ├── inner-join (merge) + │ ├── columns: a:1(int!null) b:2(int) c:3(int) x:8(int!null) y:9(int) z:10(int) + │ ├── left ordering: +8 + │ ├── right ordering: +1 + │ ├── fd: (1)==(8), (8)==(1) + │ ├── ordering: +(1|8) [actual: +8] + │ ├── scan xyz@xy + │ │ ├── columns: x:8(int) y:9(int) z:10(int) + │ │ └── ordering: +8 + │ ├── scan abc@ab + │ │ ├── columns: a:1(int) b:2(int) c:3(int) + │ │ └── ordering: +1 + │ └── filters (true) + └── filters (true) + +memo expect=AssociateJoin +SELECT * FROM abc, stu, xyz WHERE abc.a=stu.s AND stu.s=xyz.x +---- +memo (optimized, ~36KB, required=[presentation: a:1,b:2,c:3,s:5,t:6,u:7,x:8,y:9,z:10]) + ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+5) (merge-join G3 G2 G5 inner-join,+5,+1) (lookup-join G3 G5 abc@ab,keyCols=[5],outCols=(1-3,5-10)) (inner-join G6 G7 G8) (inner-join G9 G10 G11) (inner-join G7 G6 G8) (merge-join G6 G7 G5 inner-join,+5,+1) (inner-join G10 G9 G11) (merge-join G9 G10 G5 inner-join,+8,+5) (merge-join G7 G6 G5 inner-join,+1,+5) (lookup-join G7 G5 stu,keyCols=[1],outCols=(1-3,5-10)) (inner-join G9 G12 G13) (merge-join G10 G9 G5 inner-join,+5,+8) (lookup-join G10 G5 xyz@xy,keyCols=[5],outCols=(1-3,5-10)) (inner-join G12 G9 G13) (merge-join G9 G12 G5 inner-join,+8,+1) (merge-join G12 G9 G5 inner-join,+1,+8) (lookup-join G12 G5 xyz@xy,keyCols=[1],outCols=(1-3,5-10)) + │ └── [presentation: a:1,b:2,c:3,s:5,t:6,u:7,x:8,y:9,z:10] + │ ├── best: (merge-join G6="[ordering: +5]" G7="[ordering: +(1|8)]" G5 inner-join,+5,+1) + │ └── cost: 12980.08 + ├── G2: (scan abc,cols=(1-3)) (scan abc@ab,cols=(1-3)) (scan abc@bc,cols=(1-3)) + │ ├── [ordering: +1] + │ │ ├── best: (scan abc@ab,cols=(1-3)) + │ │ └── cost: 1070.02 + │ └── [] + │ ├── best: (scan abc,cols=(1-3)) + │ └── cost: 1070.02 + ├── G3: (inner-join G6 G9 G11) (inner-join G9 G6 G11) (merge-join G6 G9 G5 inner-join,+5,+8) (lookup-join G6 G5 xyz@xy,keyCols=[5],outCols=(5-10)) (merge-join G9 G6 G5 inner-join,+8,+5) (lookup-join G9 G5 stu,keyCols=[8],outCols=(5-10)) + │ ├── [ordering: +(5|8)] + │ │ ├── best: (merge-join G6="[ordering: +5]" G9="[ordering: +8]" G5 inner-join,+5,+8) + │ │ └── cost: 11880.05 + │ └── [] + │ ├── best: (merge-join G6="[ordering: +5]" G9="[ordering: +8]" G5 inner-join,+5,+8) + │ └── cost: 11880.05 + ├── G4: (filters G14) + ├── G5: (filters) + ├── G6: (scan stu) (scan stu@uts) + │ ├── [ordering: +5] + │ │ ├── best: (scan stu) + │ │ └── cost: 10600.02 + │ └── [] + │ ├── best: (scan stu) + │ └── cost: 10600.02 + ├── G7: (inner-join G9 G2 G13) (inner-join G2 G9 G13) (merge-join G9 G2 G5 inner-join,+8,+1) (lookup-join G9 G5 abc@ab,keyCols=[8],outCols=(1-3,8-10)) (merge-join G2 G9 G5 inner-join,+1,+8) (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,8-10)) + │ ├── [ordering: +(1|8)] + │ │ ├── best: (merge-join G9="[ordering: +8]" G2="[ordering: +1]" G5 inner-join,+8,+1) + │ │ └── cost: 2170.05 + │ └── [] + │ ├── best: (merge-join G9="[ordering: +8]" G2="[ordering: +1]" G5 inner-join,+8,+1) + │ └── cost: 2170.05 + ├── G8: (filters G15) + ├── G9: (scan xyz,cols=(8-10)) (scan xyz@xy,cols=(8-10)) (scan xyz@yz,cols=(8-10)) + │ ├── [ordering: +8] + │ │ ├── best: (scan xyz@xy,cols=(8-10)) + │ │ └── cost: 1070.02 + │ └── [] + │ ├── best: (scan xyz,cols=(8-10)) + │ └── cost: 1070.02 + ├── G10: (inner-join G6 G2 G4) (inner-join G2 G6 G4) (merge-join G6 G2 G5 inner-join,+5,+1) (lookup-join G6 G5 abc@ab,keyCols=[5],outCols=(1-3,5-7)) (merge-join G2 G6 G5 inner-join,+1,+5) (lookup-join G2 G5 stu,keyCols=[1],outCols=(1-3,5-7)) + │ ├── [ordering: +(1|5)] + │ │ ├── best: (merge-join G6="[ordering: +5]" G2="[ordering: +1]" G5 inner-join,+5,+1) + │ │ └── cost: 11880.05 + │ └── [] + │ ├── best: (merge-join G6="[ordering: +5]" G2="[ordering: +1]" G5 inner-join,+5,+1) + │ └── cost: 11880.05 + ├── G11: (filters G16) + ├── G12: (inner-join G2 G6 G8) (inner-join G6 G2 G8) + │ ├── [ordering: +(1|5)] + │ │ ├── best: (sort G12) + │ │ └── cost: 14770.10 + │ └── [] + │ ├── best: (inner-join G6 G2 G8) + │ └── cost: 11912.55 + ├── G13: (filters G17) + ├── G14: (eq G18 G19) + ├── G15: (eq G19 G18) + ├── G16: (eq G19 G20) + ├── G17: (eq G18 G20) + ├── G18: (variable a) + ├── G19: (variable s) + └── G20: (variable x) + +# Regression test for #36226. +exec-ddl +CREATE TABLE parent1 (pid1 INT PRIMARY KEY, pa1 INT) +---- + +exec-ddl +CREATE TABLE child1 ( + pid1 INT, + cid1 INT, + ca1 INT, + PRIMARY KEY(pid1, cid1) +) +INTERLEAVE IN PARENT parent1 (pid1) +---- + +exec-ddl +CREATE TABLE grandchild1 ( + pid1 INT, + cid1 INT, + gcid1 INT, + gca1 INT, + PRIMARY KEY(pid1, cid1, gcid1) +) +INTERLEAVE IN PARENT child1 (pid1, cid1) +---- + +opt expect=AssociateJoin join-limit=4 +SELECT * FROM grandchild1 +JOIN child1 USING (pid1, cid1) +JOIN parent1 USING (pid1) +ORDER BY pid1 +---- +project + ├── columns: pid1:1(int!null) cid1:2(int!null) gcid1:3(int!null) gca1:4(int) ca1:7(int) pa1:9(int) + ├── key: (1-3) + ├── fd: (1-3)-->(4), (1,2)-->(7), (1)-->(9) + ├── ordering: +1 + └── inner-join (lookup parent1) + ├── columns: grandchild1.pid1:1(int!null) grandchild1.cid1:2(int!null) gcid1:3(int!null) gca1:4(int) child1.pid1:5(int!null) child1.cid1:6(int!null) ca1:7(int) parent1.pid1:8(int!null) pa1:9(int) + ├── key columns: [1] = [8] + ├── key: (3,6,8) + ├── fd: (1-3)-->(4), (5,6)-->(7), (1)==(5,8), (5)==(1,8), (2)==(6), (6)==(2), (8)-->(9), (8)==(1,5) + ├── ordering: +(1|5|8) [actual: +1] + ├── inner-join (merge) + │ ├── columns: grandchild1.pid1:1(int!null) grandchild1.cid1:2(int!null) gcid1:3(int!null) gca1:4(int) child1.pid1:5(int!null) child1.cid1:6(int!null) ca1:7(int) + │ ├── left ordering: +1,+2 + │ ├── right ordering: +5,+6 + │ ├── key: (3,5,6) + │ ├── fd: (1-3)-->(4), (5,6)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2) + │ ├── ordering: +(1|5) [actual: +1] + │ ├── scan grandchild1 + │ │ ├── columns: grandchild1.pid1:1(int!null) grandchild1.cid1:2(int!null) gcid1:3(int!null) gca1:4(int) + │ │ ├── key: (1-3) + │ │ ├── fd: (1-3)-->(4) + │ │ └── ordering: +1,+2 + │ ├── scan child1 + │ │ ├── columns: child1.pid1:5(int!null) child1.cid1:6(int!null) ca1:7(int) + │ │ ├── key: (5,6) + │ │ ├── fd: (5,6)-->(7) + │ │ └── ordering: +5,+6 + │ └── filters (true) + └── filters (true) + +memo expect=AssociateJoin join-limit=4 +SELECT * FROM grandchild1 +JOIN child1 USING (pid1, cid1) +JOIN parent1 USING (pid1) +ORDER BY pid1 +---- +memo (optimized, ~31KB, required=[presentation: pid1:1,cid1:2,gcid1:3,gca1:4,ca1:7,pa1:9] [ordering: +1]) + ├── G1: (project G2 G3 pid1 cid1 gcid1 gca1 ca1 pa1) + │ ├── [presentation: pid1:1,cid1:2,gcid1:3,gca1:4,ca1:7,pa1:9] [ordering: +1] + │ │ ├── best: (project G2="[ordering: +(1|5|8)]" G3 pid1 cid1 gcid1 gca1 ca1 pa1) + │ │ └── cost: 2766.07 + │ └── [] + │ ├── best: (project G2 G3 pid1 cid1 gcid1 gca1 ca1 pa1) + │ └── cost: 2766.07 + ├── G2: (inner-join G4 G5 G6) (inner-join G5 G4 G6) (merge-join G4 G5 G7 inner-join,+1,+8) (lookup-join G4 G7 parent1,keyCols=[1],outCols=(1-9)) (inner-join G8 G9 G10) (inner-join G11 G12 G10) (merge-join G5 G4 G7 inner-join,+8,+1) (inner-join G9 G8 G10) (merge-join G8 G9 G7 inner-join,+1,+2,+5,+6) (inner-join G12 G11 G10) (merge-join G11 G12 G7 inner-join,+5,+6,+1,+2) (merge-join G9 G8 G7 inner-join,+5,+6,+1,+2) (lookup-join G9 G7 grandchild1,keyCols=[5 6],outCols=(1-9)) (inner-join G11 G12 G13) (inner-join G5 G4 G14) (merge-join G12 G11 G7 inner-join,+1,+2,+5,+6) (lookup-join G12 G7 child1,keyCols=[1 2],outCols=(1-9)) (inner-join G12 G11 G13) (inner-join G4 G5 G14) (merge-join G5 G4 G7 inner-join,+8,+5) (inner-join G5 G15 G6) (merge-join G4 G5 G7 inner-join,+5,+8) (lookup-join G4 G7 parent1,keyCols=[5],outCols=(1-9)) (inner-join G15 G5 G6) (merge-join G5 G15 G7 inner-join,+8,+1) (merge-join G15 G5 G7 inner-join,+1,+8) (lookup-join G15 G7 parent1,keyCols=[1],outCols=(1-9)) + │ ├── [ordering: +(1|5|8)] + │ │ ├── best: (lookup-join G4="[ordering: +(1|5)]" G7 parent1,keyCols=[1],outCols=(1-9)) + │ │ └── cost: 2765.06 + │ └── [] + │ ├── best: (lookup-join G4 G7 parent1,keyCols=[1],outCols=(1-9)) + │ └── cost: 2765.06 + ├── G3: (projections) + ├── G4: (inner-join G8 G11 G10) (inner-join G11 G8 G10) (merge-join G8 G11 G7 inner-join,+1,+2,+5,+6) (lookup-join G8 G7 child1,keyCols=[1 2],outCols=(1-7)) (merge-join G11 G8 G7 inner-join,+5,+6,+1,+2) (lookup-join G11 G7 grandchild1,keyCols=[5 6],outCols=(1-7)) + │ ├── [ordering: +(1|5)] + │ │ ├── best: (merge-join G8="[ordering: +1,+2]" G11="[ordering: +5,+6]" G7 inner-join,+1,+2,+5,+6) + │ │ └── cost: 2161.05 + │ └── [] + │ ├── best: (merge-join G8="[ordering: +1,+2]" G11="[ordering: +5,+6]" G7 inner-join,+1,+2,+5,+6) + │ └── cost: 2161.05 + ├── G5: (scan parent1) + │ ├── [ordering: +8] + │ │ ├── best: (scan parent1) + │ │ └── cost: 1040.02 + │ └── [] + │ ├── best: (scan parent1) + │ └── cost: 1040.02 + ├── G6: (filters G16) + ├── G7: (filters) + ├── G8: (scan grandchild1) + │ ├── [ordering: +1,+2] + │ │ ├── best: (scan grandchild1) + │ │ └── cost: 1080.02 + │ ├── [ordering: +1] + │ │ ├── best: (scan grandchild1) + │ │ └── cost: 1080.02 + │ └── [] + │ ├── best: (scan grandchild1) + │ └── cost: 1080.02 + ├── G9: (inner-join G11 G5 G14) (inner-join G5 G11 G14) (merge-join G11 G5 G7 inner-join,+5,+8) (lookup-join G11 G7 parent1,keyCols=[5],outCols=(5-9)) (merge-join G5 G11 G7 inner-join,+8,+5) (lookup-join G5 G7 child1,keyCols=[8],outCols=(5-9)) + │ ├── [ordering: +(5|8),+6] + │ │ ├── best: (sort G9) + │ │ └── cost: 2360.34 + │ ├── [ordering: +(5|8)] + │ │ ├── best: (merge-join G11="[ordering: +5]" G5="[ordering: +8]" G7 inner-join,+5,+8) + │ │ └── cost: 2130.05 + │ └── [] + │ ├── best: (merge-join G11="[ordering: +5]" G5="[ordering: +8]" G7 inner-join,+5,+8) + │ └── cost: 2130.05 + ├── G10: (filters G17 G18) + ├── G11: (scan child1) + │ ├── [ordering: +5,+6] + │ │ ├── best: (scan child1) + │ │ └── cost: 1060.02 + │ ├── [ordering: +5] + │ │ ├── best: (scan child1) + │ │ └── cost: 1060.02 + │ └── [] + │ ├── best: (scan child1) + │ └── cost: 1060.02 + ├── G12: (inner-join G8 G5 G6) (inner-join G5 G8 G6) (merge-join G8 G5 G7 inner-join,+1,+8) (lookup-join G8 G7 parent1,keyCols=[1],outCols=(1-4,8,9)) (merge-join G5 G8 G7 inner-join,+8,+1) (lookup-join G5 G7 grandchild1,keyCols=[8],outCols=(1-4,8,9)) + │ ├── [ordering: +(1|8),+2] + │ │ ├── best: (sort G12) + │ │ └── cost: 2380.34 + │ ├── [ordering: +(1|8)] + │ │ ├── best: (merge-join G8="[ordering: +1]" G5="[ordering: +8]" G7 inner-join,+1,+8) + │ │ └── cost: 2150.05 + │ └── [] + │ ├── best: (merge-join G8="[ordering: +1]" G5="[ordering: +8]" G7 inner-join,+1,+8) + │ └── cost: 2150.05 + ├── G13: (filters G18 G19) + ├── G14: (filters G20) + ├── G15: (inner-join G8 G11 G13) (inner-join G11 G8 G13) + │ ├── [ordering: +(1|5)] + │ │ ├── best: (sort G15) + │ │ └── cost: 2186.35 + │ └── [] + │ ├── best: (inner-join G8 G11 G13) + │ └── cost: 2171.05 + ├── G16: (eq G21 G22) + ├── G17: (eq G21 G23) + ├── G18: (eq G24 G25) + ├── G19: (eq G23 G21) + ├── G20: (eq G23 G22) + ├── G21: (variable grandchild1.pid1) + ├── G22: (variable parent1.pid1) + ├── G23: (variable child1.pid1) + ├── G24: (variable grandchild1.cid1) + └── G25: (variable child1.cid1)