opt: calculate number of rows processed when costing lookup and merge…

… joins This commit updates the costing of both lookup joins and merge joins to take into account the number of rows processed by the operator. This number may be larger than the number of output rows if an additional filter is applied as part of the ON condition that is not used to determine equality columns for the join. For example, consider the query `SELECT * FROM abc JOIN def ON a = e AND b = 3;` Assuming there is no index on b, if a lookup join is used to execute this query, the number of rows processed is actually the same as the query `SELECT * FROM abc JOIN def ON a = e;` The difference is that the filter b=3 must also be applied to every row in the first query. The coster now takes this into account when determining the cost of lookup joins and merge joins. Informs #34810 Release note: None
cockroachdb · Aug 27, 2019 · 32d9973 · 32d9973
1 parent aecfa10
commit 32d9973
Show file tree

Hide file tree

Showing 16 changed files with 728 additions and 605 deletions.
diff --git a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join
@@ -217,25 +217,25 @@ ALTER TABLE authors INJECT STATISTICS '[
 query TTTTT colnames
 EXPLAIN (VERBOSE) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf
 ----
-tree                        field        description        columns                                   ordering
-distinct                    ·            ·                  (name)                                    weak-key(name)
- │                          distinct on  name               ·                                         ·
- └── render                 ·            ·                  (name)                                    ·
-      │                     render 0     name               ·                                         ·
-      └── hash-join         ·            ·                  (title, shelf, title, shelf, name, book)  ·
-           │                type         inner              ·                                         ·
-           │                equality     (title) = (book)   ·                                         ·
-           ├── lookup-join  ·            ·                  (title, shelf, title, shelf)              ·
-           │    │           table        books2@primary     ·                                         ·
-           │    │           type         inner              ·                                         ·
-           │    │           equality     (title) = (title)  ·                                         ·
-           │    │           pred         @2 != @4           ·                                         ·
-           │    └── scan    ·            ·                  (title, shelf)                            ·
-           │                table        books@primary      ·                                         ·
-           │                spans        ALL                ·                                         ·
-           └── scan         ·            ·                  (name, book)                              ·
-·                           table        authors@primary    ·                                         ·
-·                           spans        ALL                ·                                         ·
+tree                      field        description        columns                                   ordering
+distinct                  ·            ·                  (name)                                    weak-key(name)
+ │                        distinct on  name               ·                                         ·
+ └── render               ·            ·                  (name)                                    ·
+      │                   render 0     name               ·                                         ·
+      └── lookup-join     ·            ·                  (name, book, title, shelf, title, shelf)  ·
+           │              table        books2@primary     ·                                         ·
+           │              type         inner              ·                                         ·
+           │              equality     (title) = (title)  ·                                         ·
+           │              pred         @4 != @6           ·                                         ·
+           └── hash-join  ·            ·                  (name, book, title, shelf)                ·
+                │         type         inner              ·                                         ·
+                │         equality     (book) = (title)   ·                                         ·
+                ├── scan  ·            ·                  (name, book)                              ·
+                │         table        authors@primary    ·                                         ·
+                │         spans        ALL                ·                                         ·
+                └── scan  ·            ·                  (title, shelf)                            ·
+·                         table        books@primary      ·                                         ·
+·                         spans        ALL                ·                                         ·
 
 # Verify data placement.
 query TTTI colnames
@@ -249,7 +249,7 @@ NULL       NULL     {5}       5
 query T
 SELECT url FROM [EXPLAIN (DISTSQL) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf]
 ----
-https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CndOCSjYku0cDAEdemhK2ZS0t7IHxZruqnE0RpKhJey7F9mlWZu10s1Rf76Zb362nsGSxo16Qg_Nd-DAoIYtg95Ri96Ti9vTpVv9C5qCgbH9EOL2lkFLDqF5hmBCh9DAhq6oz2tgoDEo043XDgxoCC-QD2qH0Fwf2FFhni78TT10eI9Ko8uLWXnonXlS7rd8IHr0wOBrr6xvsitgcDeEJpOcyRLWLPg5Fp_I2L8SPCEhgMFnosehz36SsRnZaBF9NpkU2fubTNbHdoLJksl61VG8NSlxWlINYR-_7OmsxKpHuerx0n6w5DQ61Mt_4PUrJ4b5qPw-ho4uL-ezdPgjXEh-eePMbh8upLj8N8N6kNU5QX4wPhjbhryad5Z8tX49q__KW7lH35P1-F-PpYjpoN7hlLanwbX4xVE7tpmWdyM3bmj0YTq9nha3djqKgscwT8IiDYskXM1gvoTLM2CxhKskXKe16yRcLODt4d2fAAAA__8CwK1z
+https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CnVOCSjsyn9SEAR06KEpZbekvZU9KNZ0V42jMZIMLWHfvcgubGxipZujpPlG3_xsPYEjgxv9iAHkDxDAoYYdh85TgyGQT9tj0a35DXLNwbquj2l7x6EhjyCfINrYIkjY0BV1qxo4GIzatkPZkQP18QSFqPcI8vrInzUW-cbf9X2Ld6gN-tV60h46bx-1_6N0Hw_Jl8O3Trsg2RVw2PZRMiW4KmDJQ7zVQ7zscU_0sGRRLloUixany3tH3qBHM8_39ZIXRvmkw-EzWYd-VUwnafFnvFDF5Y23-0O8UOJykiRXJVfV4iTlOXkmgX9xlpk4k-AXooe-Y7_IOkZOMpWA7Yapir2_YerDyXFRrTpH7aMN0bomrqqpWKZ_Pen_ylu5w9CRC_hfj2WdviCaPY5_RKDeN_jVUzNcMy63AzdsGAxxPL0eF7duPEqCz2GRhcsJLOZwcQZczOEyC1d57SoL13m4zsLrGbw7vvsbAAD__5P3rXQ=
 
 query TTTTT colnames
 EXPLAIN (VERBOSE) SELECT a.name FROM authors AS a JOIN books2 AS b2 ON a.book = b2.title ORDER BY a.name

diff --git a/pkg/sql/opt/memo/logical_props_builder.go b/pkg/sql/opt/memo/logical_props_builder.go
@@ -1586,6 +1586,14 @@ func ensureLookupJoinInputProps(join *LookupJoinExpr, sb *statisticsBuilder) *pr
 	if relational.OutputCols.Empty() {
 		md := join.Memo().Metadata()
 		relational.OutputCols = join.Cols.Difference(join.Input.Relational().OutputCols)
+
+		// Include the key columns in the output columns.
+		index := md.Table(join.Table).Index(join.Index)
+		for i := range join.KeyCols {
+			indexColID := join.Table.ColumnID(index.Column(i).Ordinal)
+			relational.OutputCols.Add(indexColID)
+		}
+
 		relational.NotNullCols = tableNotNullCols(md, join.Table)
 		relational.NotNullCols.IntersectionWith(relational.OutputCols)
 		relational.Cardinality = props.AnyCardinality
@@ -1679,6 +1687,8 @@ type joinPropsHelper struct {
 	filterNotNullCols opt.ColSet
 	filterIsTrue      bool
 	filterIsFalse     bool
+
+	selfJoinCols opt.ColSet
 }
 
 func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
@@ -1702,6 +1712,10 @@ func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
 			h.filterNotNullCols.Add(colID)
 			h.filterNotNullCols.Add(indexColID)
 			h.filtersFD.AddEquivalency(colID, indexColID)
+			if colID == indexColID {
+				// This can happen if an index join was converted into a lookup join.
+				h.selfJoinCols.Add(colID)
+			}
 		}
 
 		// Lookup join has implicit equality conditions on KeyCols.

diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go
@@ -358,6 +358,18 @@ func (m *Memo) RequestColStat(
 	return nil, false
 }
 
+// RowsProcessed calculates and returns the number of rows processed by the
+// relational expression. It is currently only supported for lookup joins and
+// merge joins.
+func (m *Memo) RowsProcessed(expr RelExpr) (_ float64, ok bool) {
+	// When SetRoot is called, the statistics builder may have been cleared.
+	// If this happens, we can't serve the request anymore.
+	if m.logPropsBuilder.sb.md != nil {
+		return m.logPropsBuilder.sb.rowsProcessed(expr), true
+	}
+	return 0, false
+}
+
 // NextWithID returns a not-yet-assigned identifier for a WITH expression.
 func (m *Memo) NextWithID() opt.WithID {
 	m.curWithID++

diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go
@@ -259,6 +259,7 @@ func (sb *statisticsBuilder) colStatFromInput(colSet opt.ColSet, e RelExpr) *pro
 		} else {
 			leftProps = e.Child(0).(RelExpr).Relational()
 		}
+
 		intersectsLeft := leftProps.OutputCols.Intersects(colSet)
 		var intersectsRight bool
 		if lookupJoin != nil {
@@ -268,13 +269,11 @@ func (sb *statisticsBuilder) colStatFromInput(colSet opt.ColSet, e RelExpr) *pro
 		} else {
 			intersectsRight = e.Child(1).(RelExpr).Relational().OutputCols.Intersects(colSet)
 		}
+
+		// It's possible that colSet intersects both left and right if we have a
+		// lookup join that was converted from an index join, so check the left
+		// side first.
 		if intersectsLeft {
-			if intersectsRight {
-				// TODO(radu): what if both sides have columns in colSet?
-				panic(errors.AssertionFailedf(
-					"colSet %v contains both left and right columns", log.Safe(colSet),
-				))
-			}
 			if zigzagJoin != nil {
 				return sb.colStatTable(zigzagJoin.LeftTable, colSet)
 			}
@@ -901,7 +900,13 @@ func (sb *statisticsBuilder) buildJoin(
 
 	// Calculate selectivity and row count
 	// -----------------------------------
-	s.RowCount = leftStats.RowCount * rightStats.RowCount
+	if h.rightProps.FuncDeps.ColsAreStrictKey(h.selfJoinCols) {
+		// This is like an index join.
+		s.RowCount = leftStats.RowCount
+	} else {
+		s.RowCount = leftStats.RowCount * rightStats.RowCount
+		equivReps.UnionWith(h.selfJoinCols)
+	}
 	inputRowCount := s.RowCount
 	s.ApplySelectivity(sb.selectivityFromDistinctCounts(constrainedCols, join, s))
 	s.ApplySelectivity(sb.selectivityFromEquivalencies(equivReps, &h.filtersFD, join, s))
@@ -2271,6 +2276,39 @@ func (sb *statisticsBuilder) shouldUseHistogram(relProps *props.Relational) bool
 	return relProps.Cardinality.Max >= minCardinalityForHistogram
 }
 
+// rowsProcessed calculates and returns the number of rows processed by the
+// relational expression. It is currently only supported for lookup joins and
+// merge joins.
+func (sb *statisticsBuilder) rowsProcessed(e RelExpr) float64 {
+	switch t := e.(type) {
+	case *LookupJoinExpr:
+		if t.On.IsTrue() {
+			// If there are no additional ON filters, the number of rows processed
+			// equals the number of output rows.
+			return e.Relational().Stats.RowCount
+		}
+
+		// Otherwise, we need to determine the row count of the join before the
+		// ON conditions are applied.
+		withoutOn := e.Memo().MemoizeLookupJoin(t.Input, nil /* on */, &t.LookupJoinPrivate)
+		return withoutOn.Relational().Stats.RowCount
+
+	case *MergeJoinExpr:
+		if t.On.IsTrue() {
+			// If there are no additional ON filters, the number of rows processed
+			// equals the number of output rows.
+			return e.Relational().Stats.RowCount
+		}
+
+		// Otherwise, we need to determine the row count of the join before the
+		// ON conditions are applied.
+		withoutOn := e.Memo().MemoizeMergeJoin(t.Left, t.Right, nil /* on */, &t.MergeJoinPrivate)
+		return withoutOn.Relational().Stats.RowCount
+	}
+
+	panic(errors.AssertionFailedf("rowsProcessed not supported for operator type %v", log.Safe(e.Op())))
+}
+
 func min(a float64, b float64) float64 {
 	if a < b {
 		return a

diff --git a/pkg/sql/opt/memo/testdata/stats/lookup-join b/pkg/sql/opt/memo/testdata/stats/lookup-join
@@ -135,7 +135,7 @@ CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, c))
 ----
 
 exec-ddl
-CREATE TABLE def (d INT, e INT, f INT, g FLOAT, PRIMARY KEY (f, e), INDEX e_idx (e) STORING (d))
+CREATE TABLE def (d INT, e INT, f INT, g FLOAT, PRIMARY KEY (f, e), INDEX e_idx (e) STORING (d), INDEX d_idx (d))
 ----
 
 # Set up the statistics as if the first table is much smaller than the second.
@@ -152,6 +152,12 @@ ALTER TABLE abc INJECT STATISTICS '[
 
 exec-ddl
 ALTER TABLE def INJECT STATISTICS '[
+  {
+    "columns": ["d"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 10000,
+    "distinct_count": 1000
+  },
   {
     "columns": ["e"],
     "created_at": "2018-01-01 1:00:00.00000+00:00",
@@ -216,7 +222,7 @@ SELECT a, b, c, d, e, f FROM abc JOIN DEF ON a = f
 inner-join (lookup def)
  ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null)
  ├── key columns: [1] = [6]
- ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=9.99954623, null(2)=1, distinct(3)=9.99954623, null(3)=0, distinct(4)=95.1671064, null(4)=1, distinct(5)=63.2138954, null(5)=0, distinct(6)=100, null(6)=0, distinct(2,5,6)=100, null(2,5,6)=1]
+ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=9.99954623, null(2)=1, distinct(3)=9.99954623, null(3)=0, distinct(4)=95.1671064, null(4)=0, distinct(5)=63.2138954, null(5)=0, distinct(6)=100, null(6)=0, distinct(2,5,6)=100, null(2,5,6)=1]
  ├── key: (3,5,6)
  ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(6), (6)==(1)
  ├── scan abc
@@ -228,18 +234,18 @@ inner-join (lookup def)
 
 # Check column statistics for double lookup join.
 opt colstat=7
-SELECT * FROM abc LEFT JOIN DEF ON a = e AND b = 3
+SELECT * FROM abc LEFT JOIN DEF ON a = d AND b = 3
 ----
 left-join (lookup def)
  ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int) g:7(float)
  ├── key columns: [6 5] = [6 5]
- ├── stats: [rows=1000, distinct(5)=100, null(5)=0, distinct(7)=632.304575, null(7)=10]
+ ├── stats: [rows=100, distinct(4)=100, null(4)=0, distinct(7)=95.1671064, null(7)=1]
  ├── key: (1,3,5,6)
  ├── fd: (1,3)-->(2), (5,6)-->(4,7)
- ├── left-join (lookup def@e_idx)
+ ├── left-join (lookup def@d_idx)
  │    ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int)
- │    ├── key columns: [1] = [5]
- │    ├── stats: [rows=1000, distinct(5)=100, null(5)=0]
+ │    ├── key columns: [1] = [4]
+ │    ├── stats: [rows=100, distinct(4)=100, null(4)=0]
  │    ├── key: (1,3,5,6)
  │    ├── fd: (1,3)-->(2), (5,6)-->(4)
  │    ├── scan abc
@@ -250,3 +256,27 @@ left-join (lookup def)
  │    └── filters
  │         └── b = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)]
  └── filters (true)
+
+# The filter a=e is not very selective, so we do not expect a lookup join, even
+# though there is an additional filter.
+opt colstat=7
+SELECT * FROM abc LEFT JOIN DEF ON a = e AND b = 3
+----
+right-join (hash)
+ ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int) g:7(float)
+ ├── stats: [rows=1000, distinct(5)=100, null(5)=0, distinct(7)=632.304575, null(7)=10]
+ ├── key: (1,3,5,6)
+ ├── fd: (1,3)-->(2), (5,6)-->(4,7)
+ ├── scan def
+ │    ├── columns: d:4(int) e:5(int!null) f:6(int!null) g:7(float)
+ │    ├── stats: [rows=10000, distinct(5)=100, null(5)=0, distinct(7)=1000, null(7)=100]
+ │    ├── key: (5,6)
+ │    └── fd: (5,6)-->(4,7)
+ ├── scan abc
+ │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
+ │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0]
+ │    ├── key: (1,3)
+ │    └── fd: (1,3)-->(2)
+ └── filters
+      ├── a = e [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]
+      └── b = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)]
diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpcc b/pkg/sql/opt/memo/testdata/stats_quality/tpcc
@@ -614,7 +614,7 @@ scalar-group-by
  ├── columns: count:28(int)
  ├── cardinality: [1 - 1]
  ├── stats: [rows=1, distinct(28)=1, null(28)=0]
- ├── cost: 1436.73853
+ ├── cost: 1370.66482
  ├── key: ()
  ├── fd: ()-->(28)
  ├── prune: (28)
@@ -623,7 +623,7 @@ scalar-group-by
  │    ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) s_i_id:11(int!null) s_w_id:12(int!null) s_quantity:13(int!null)
  │    ├── key columns: [3 5] = [12 11]
  │    ├── stats: [rows=216.137889, distinct(1)=19.9995949, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=185.570315, null(5)=0, distinct(11)=185.570315, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=30.3089364, null(13)=0]
- │    ├── cost: 1434.55715
+ │    ├── cost: 1368.48344
  │    ├── fd: ()-->(2,3,12), (11)-->(13), (5)==(11), (11)==(5), (3)==(12), (12)==(3)
  │    ├── interesting orderings: (+3,+2,-1)
  │    ├── scan order_line
@@ -714,7 +714,7 @@ scalar-group-by
  ├── columns: count:22(int)
  ├── cardinality: [1 - 1]
  ├── stats: [rows=1, distinct(22)=1, null(22)=0]
- ├── cost: 126.546667
+ ├── cost: 126.613333
  ├── key: ()
  ├── fd: ()-->(22)
  ├── prune: (22)
@@ -724,7 +724,7 @@ scalar-group-by
  │    ├── left ordering: +1
  │    ├── right ordering: +11
  │    ├── stats: [rows=3.33333333, distinct(1)=3.33333333, null(1)=0, distinct(9)=1, null(9)=0, distinct(11)=3.33333333, null(11)=0, distinct(21)=3.33333333, null(21)=0]
- │    ├── cost: 126.493333
+ │    ├── cost: 126.56
  │    ├── key: (11)
  │    ├── fd: (1)-->(9), (11)-->(21), (1)==(11), (11)==(1)
  │    ├── scan warehouse

diff --git a/pkg/sql/opt/optgen/exprgen/testdata/join b/pkg/sql/opt/optgen/exprgen/testdata/join
@@ -47,8 +47,8 @@ expr
 left-join (lookup abc@ab)
  ├── columns: t.public.abc.a:5(int) t.public.abc.b:6(int)
  ├── key columns: [5] = [5]
- ├── stats: [rows=333333.333, distinct(5)=100, null(5)=3333.33333]
- ├── cost: 691726.707
+ ├── stats: [rows=3333.33333, distinct(5)=100, null(5)=33.3333333]
+ ├── cost: 25660.04
  ├── scan t.public.def
  │    ├── columns: t.public.def.d:1(int) t.public.def.e:2(int)
  │    ├── stats: [rows=1000, distinct(2)=100, null(2)=10]