Merge cockroachdb#73565

73565: indexrec: improve ordering recommendations r=nehageorge a=nehageorge Previously, in order to avoid redundant recommendations, we standardized the index candidate creation for *memo.SortExprs, or ordering candidates. More specifically, if the recommendation query contained `ORDER BY k DESC, i ASC` we would create an index candidate with the key (k, i DESC), allowing for a reverse scan of the index. Since reverse scans are less efficient than forward scans, this is not ideal. In this commit, we no longer standardize ordering recommendations. We always create the sort candidate according to how it is in the query. There is no handling for redundant recommendations (meaning recommending an index on `(k ASC)` and `(k DESC)`). Since queries usually only have one `ORDER BY` clause, it doesn't seem like an important case to consider. Plus, through experimentation, it seems that when there are redundant candidates, (only possible for single column indexes), there is no redundant recommendation. See tests added in this PR. Fixes: cockroachdb#73451. Release note: None Co-authored-by: Neha George <neha.george@cockroachlabs.com>
mgartner · Dec 16, 2021 · d05ec98 · d05ec98
2 parents 66c3ff0 + ab1a6cd
commit d05ec98
Show file tree

Hide file tree

Showing 12 changed files with 117 additions and 58 deletions.
diff --git a/pkg/sql/opt/exec/execbuilder/testdata/distinct_on b/pkg/sql/opt/exec/execbuilder/testdata/distinct_on
@@ -313,7 +313,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON xyz (x);
+   SQL command: CREATE INDEX ON xyz (x DESC);
 
 query T
 EXPLAIN (VERBOSE) SELECT DISTINCT ON (x, z) y, z, x FROM xyz ORDER BY z

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/insert b/pkg/sql/opt/exec/execbuilder/testdata/insert
@@ -336,7 +336,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON select_t (v) STORING (x);
+   SQL command: CREATE INDEX ON select_t (v DESC) STORING (x);
 
 # Check that INSERT supports LIMIT (MySQL extension)
 query T

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/limit b/pkg/sql/opt/exec/execbuilder/testdata/limit
@@ -146,6 +146,10 @@ vectorized: true
       table: t@t_v_idx
       spans: LIMITED SCAN
       limit: 6
+·
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON t (v DESC);
 
 query T
 EXPLAIN (VERBOSE) SELECT sum(w) FROM t GROUP BY k, v ORDER BY v DESC LIMIT 10

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/orderby b/pkg/sql/opt/exec/execbuilder/testdata/orderby
@@ -42,7 +42,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON t (b);
+   SQL command: CREATE INDEX ON t (b DESC);
 
 # TODO(radu): Should set "strategy top 2" on sort node
 query T
@@ -1125,7 +1125,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v, k DESC);
+   SQL command: CREATE INDEX ON kv (v DESC, k);
 
 query T
 EXPLAIN (VERBOSE) SELECT k FROM kv ORDER BY INDEX kv@foo ASC
@@ -1145,7 +1145,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v, k DESC);
+   SQL command: CREATE INDEX ON kv (v DESC, k);
 
 query T
 EXPLAIN (VERBOSE) SELECT k FROM kv ORDER BY INDEX kv@foo DESC
@@ -1185,7 +1185,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v, k DESC);
+   SQL command: CREATE INDEX ON kv (v DESC, k);
 
 # Check the syntax can be used with joins.
 #
@@ -1262,6 +1262,10 @@ vectorized: true
               estimated row count: 1,000 (missing stats)
               table: kv@foo
               spans: FULL SCAN
+·
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON kv (v DESC, k);
 
 statement ok
 CREATE TABLE xyz (x INT, y INT, z INT, INDEX(z,y))
@@ -1400,7 +1404,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON t (b);
+   SQL command: CREATE INDEX ON t (b DESC);
 
 query T
 EXPLAIN (VERBOSE) SELECT a, b FROM t ORDER BY b DESC NULLS FIRST, c NULLS LAST

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/select b/pkg/sql/opt/exec/execbuilder/testdata/select
@@ -1450,6 +1450,10 @@ vectorized: true
           estimated row count: 1,000 (missing stats)
           table: abcd@abcd_pkey
           spans: FULL SCAN
+·
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON abcd (a DESC, b DESC) STORING (c, d);
 
 query T
 EXPLAIN (VERBOSE) SELECT * FROM (SELECT a, b FROM abcd LIMIT 10) WHERE a > b ORDER BY a

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/select_index b/pkg/sql/opt/exec/execbuilder/testdata/select_index
@@ -854,7 +854,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON abz (c);
+   SQL command: CREATE INDEX ON abz (c DESC);
 
 # Issue #14426: verify we don't have an internal filter that contains "a IN ()"
 # (which causes an error in DistSQL due to expression serialization).
@@ -927,8 +927,8 @@ vectorized: true
       limit: 20
 ·
 index recommendations: 1
-1. type: index replacement
-   SQL commands: DROP INDEX test2@test2_k_key; CREATE INDEX ON test2 (k) STORING (v);
+1. type: index creation
+   SQL command: CREATE INDEX ON test2 (k DESC) STORING (v);
 
 # The result output of this test requires that vectorized execution
 # is not used, so it has been moved to select_index_vectorize_off.
@@ -1490,8 +1490,8 @@ vectorized: true
           spans: FULL SCAN
 ·
 index recommendations: 1
-1. type: index replacement
-   SQL commands: DROP INDEX noncover@c; CREATE INDEX ON noncover (c) STORING (b, d);
+1. type: index creation
+   SQL command: CREATE INDEX ON noncover (c DESC) STORING (b, d);
 
 query T
 EXPLAIN (VERBOSE) SELECT * FROM noncover WHERE c > 0 ORDER BY c

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/select_index_flags b/pkg/sql/opt/exec/execbuilder/testdata/select_index_flags
@@ -34,6 +34,10 @@ vectorized: true
   missing stats
   table: abcd@abcd_pkey
   spans: [/20 - /30]
+·
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON abcd (a DESC) STORING (b, c, d);
 
 # Force primary
 query T
@@ -139,8 +143,8 @@ vectorized: true
       limit: 5
 ·
 index recommendations: 1
-1. type: index replacement
-   SQL commands: DROP INDEX abcd@b; CREATE INDEX ON abcd (b) STORING (c, d);
+1. type: index creation
+   SQL command: CREATE INDEX ON abcd (b DESC) STORING (c, d);
 
 # Force index b, reverse scan.
 query T
@@ -159,8 +163,8 @@ vectorized: true
       limit: 5
 ·
 index recommendations: 1
-1. type: index replacement
-   SQL commands: DROP INDEX abcd@b; CREATE INDEX ON abcd (b) STORING (c, d);
+1. type: index creation
+   SQL command: CREATE INDEX ON abcd (b DESC) STORING (c, d);
 
 
 # Force index b, forward scan.
@@ -183,8 +187,8 @@ vectorized: true
           spans: FULL SCAN
 ·
 index recommendations: 1
-1. type: index replacement
-   SQL commands: DROP INDEX abcd@b; CREATE INDEX ON abcd (b) STORING (c, d);
+1. type: index creation
+   SQL command: CREATE INDEX ON abcd (b DESC) STORING (c, d);
 
 # Force index cd
 query T

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/topk b/pkg/sql/opt/exec/execbuilder/testdata/topk
@@ -75,7 +75,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON t (w) STORING (v);
+   SQL command: CREATE INDEX ON t (w DESC) STORING (v);
 
 # TopK added in subquery.
 query T

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/update b/pkg/sql/opt/exec/execbuilder/testdata/update
@@ -346,7 +346,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v);
+   SQL command: CREATE INDEX ON kv (v DESC);
 
 # Use case for UPDATE ... ORDER BY: renumbering a PK without unique violation.
 query T

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/upsert b/pkg/sql/opt/exec/execbuilder/testdata/upsert
@@ -37,7 +37,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v);
+   SQL command: CREATE INDEX ON kv (v DESC);
 
 # Use explicit target columns (which can use blind KV Put).
 query T
@@ -70,7 +70,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v);
+   SQL command: CREATE INDEX ON kv (v DESC);
 
 # Add RETURNING clause (should still use blind KV Put).
 query T
@@ -103,7 +103,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v);
+   SQL command: CREATE INDEX ON kv (v DESC);
 
 # Use subset of explicit target columns (which cannot use blind KV Put).
 query T
@@ -154,7 +154,7 @@ vectorized: true
 ·
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON kv (v);
+   SQL command: CREATE INDEX ON kv (v DESC);
 
 # Use Upsert with indexed table, default columns, computed columns, and check
 # columns.

diff --git a/pkg/sql/opt/indexrec/index_candidate_set.go b/pkg/sql/opt/indexrec/index_candidate_set.go
@@ -24,10 +24,8 @@ import (
 //
 // 	1. Add a single index on all columns in a Group By or Order By expression if
 //	   the columns are from the same table. Otherwise, group expressions into
-//	   indexes by table. For Order By, the first column of each index will be
-//     ascending. If that is the opposite of the column's ordering, each
-//     subsequent column will also be ordered opposite to its ordering (and vice
-//     versa).
+//	   indexes by table. For Order By, the index column ordering and column
+//     directions are the same as how it is in the Order By.
 //  2. Add a single-column index on any Range expression, comparison
 //     expression (=, <, >, <=, >=), and IS expression.
 // 	3. Add a single-column index on any column that appears in a JOIN predicate.
@@ -163,30 +161,18 @@ func (ics *indexCandidateSet) addSetOperationIndexes(leftCols, rightCols opt.Col
 
 // addOrderingIndex adds indexes for a *memo.SortExpr. One index is constructed
 // per table, with a column corresponding to each of the table's columns in the
-// sort, in order of appearance. The first column of each table's index will be
-// ordered ascending. If that matches the column's actual sort ordering (it's
-// ascending), then each subsequent index column will also be ordered the same
-// way it is in the sort. However, if the first column's ordering in the sort is
-// actually descending, then each subsequent column in the index will also be
-// ordered opposite to its ordering in the sort. This will allow the index to be
-// useful for scans or reverse scans.
-//
-// TODO(neha): The convention of having the first column being ascending is to
-// avoid redundant indexes. However, since reverse scans are slightly less
-// efficient than forward scans, we shouldn't have this convention and should
-// remove redundant indexes later.
+// sort, in order of appearance. For example, if we have ORDER BY k DESC, i ASC,
+// where k and i come from the same table, the index candidate's key columns
+// would be (k DESC, i ASC).
 func (ics indexCandidateSet) addOrderingIndex(ordering opt.Ordering) {
 	if len(ordering) == 0 {
 		return
 	}
 	columnList := make(opt.ColList, 0, len(ordering))
 	descList := make([]bool, 0, len(ordering))
-	numTables := len(ics.md.AllTables())
-	reverseOrder := make(map[cat.Table]bool, numTables)
 
 	for _, orderingCol := range ordering {
 		colID := orderingCol.ID()
-
 		tabID := ics.md.ColumnMeta(colID).Table
 
 		// Do not add indexes on columns with no base table.
@@ -195,17 +181,7 @@ func (ics indexCandidateSet) addOrderingIndex(ordering opt.Ordering) {
 		}
 
 		columnList = append(columnList, colID)
-		colTable := ics.md.Table(tabID)
-
-		// Set descending bool for ordering column.
-		if _, found := reverseOrder[colTable]; !found {
-			reverseOrder[colTable] = orderingCol.Descending()
-		}
-		if reverseOrder[colTable] {
-			descList = append(descList, orderingCol.Ascending())
-		} else {
-			descList = append(descList, orderingCol.Descending())
-		}
+		descList = append(descList, orderingCol.Descending())
 	}
 	if len(columnList) > 0 {
 		addMultiColumnIndex(columnList, descList, ics.md, ics.overallCandidates)

diff --git a/pkg/sql/opt/indexrec/testdata/index-candidates-recommendations b/pkg/sql/opt/indexrec/testdata/index-candidates-recommendations
@@ -386,21 +386,88 @@ index-candidates
 SELECT k, i FROM t1 ORDER BY k DESC, i ASC
 ----
 t1:
- (k, i DESC)
+ (k DESC, i)
 
 index-recommendations
 SELECT k, i FROM t1 ORDER BY k DESC, i ASC
 ----
 index recommendations: 1
 1. type: index creation
-   SQL command: CREATE INDEX ON t1 (k, i DESC);
+   SQL command: CREATE INDEX ON t1 (k DESC, i);
 --
 Optimal Plan.
-scan t1@_hyp_1,rev
+scan t1@_hyp_1
  ├── columns: k:1 i:2
- ├── cost: 1195.37442
+ ├── cost: 1094.72
  └── ordering: -1,+2
 
+# Only one index candidate is created with nested ORDER BY clauses.
+index-candidates
+SELECT * FROM (
+  SELECT k, i FROM t1
+  ORDER BY k DESC, i ASC
+)
+ORDER BY k ASC, i DESC
+----
+t1:
+ (k, i DESC)
+
+index-recommendations
+SELECT * FROM (
+  SELECT k, i FROM t1
+  ORDER BY k DESC, i ASC
+)
+ORDER BY k ASC, i DESC
+----
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON t1 (k, i DESC);
+--
+Optimal Plan.
+scan t1@_hyp_1
+ ├── columns: k:1 i:2
+ ├── cost: 1094.72
+ └── ordering: +1,-2
+
+# Redundant index candidates are created but only one index is recommended.
+index-candidates
+SELECT k FROM t1 WHERE k > 3
+UNION ALL
+SELECT k FROM t1 WHERE k < 10
+ORDER BY k DESC
+----
+t1:
+ (k DESC)
+ (k)
+
+index-recommendations
+SELECT k FROM t1 WHERE k > 3
+UNION ALL
+SELECT k FROM t1 WHERE k < 10
+ORDER BY k DESC
+----
+index recommendations: 1
+1. type: index creation
+   SQL command: CREATE INDEX ON t1 (k DESC);
+--
+Optimal Plan.
+union-all
+ ├── columns: k:15!null
+ ├── left columns: t1.k:1
+ ├── right columns: t1.k:8
+ ├── cost: 741.393333
+ ├── ordering: -15
+ ├── scan t1@_hyp_1
+ │    ├── columns: t1.k:1!null
+ │    ├── constraint: /-1/5: [ - /4]
+ │    ├── cost: 367.353333
+ │    └── ordering: -1
+ └── scan t1@_hyp_1
+      ├── columns: t1.k:8!null
+      ├── constraint: /-8/12: [/9 - /NULL)
+      ├── cost: 367.353333
+      └── ordering: -8
+
 index-candidates
 SELECT count(*) FROM t1 GROUP BY k
 ----