Skip to content

Commit

Permalink
opt: generate lookup joins with CHECK constraints and computed columns
Browse files Browse the repository at this point in the history
Previously, only explicit filters were used to generated lookup join key
columns. Now lookup join keys can be generated from CHECK constraints
and computed column expressions.

With this commit and the previous commit, lookup joins on partitioned
indexes are explored by the optimizer.

Release note (performance improvement): The query optimizer now explores
plans with lookup joins on partitioned indexes, resulting in more
efficient query plans in some cases.
  • Loading branch information
mgartner committed Dec 9, 2020
1 parent f00daca commit 1a3fd72
Show file tree
Hide file tree
Showing 11 changed files with 336 additions and 148 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/testdata/stats/lookup-join
Original file line number Diff line number Diff line change
Expand Up @@ -406,14 +406,14 @@ inner-join (lookup wxyz)
│ ├── stats: [rows=19.8, distinct(1)=19.8, null(1)=0, distinct(6)=19.8, null(6)=0, distinct(7)=1, null(7)=0, distinct(11)=1, null(11)=0]
│ ├── fd: ()-->(7), (9)-->(6,8), (1)==(6), (6)==(1)
│ ├── project
│ │ ├── columns: "project_const_col_@7":11(int!null) m:1(int) n:2(int)
│ │ ├── columns: "lookup_join_const_col_@7":11(int!null) m:1(int) n:2(int)
│ │ ├── stats: [rows=40, distinct(1)=40, null(1)=0, distinct(11)=1, null(11)=0]
│ │ ├── fd: ()-->(11)
│ │ ├── scan medium
│ │ │ ├── columns: m:1(int) n:2(int)
│ │ │ └── stats: [rows=40, distinct(1)=40, null(1)=0]
│ │ └── projections
│ │ └── 10 [as="project_const_col_@7":11, type=int]
│ │ └── 10 [as="lookup_join_const_col_@7":11, type=int]
│ └── filters (true)
└── filters (true)

Expand Down
74 changes: 74 additions & 0 deletions pkg/sql/opt/xform/general_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,77 @@ func (c *CustomFuncs) initIdxConstraintForIndex(
)
return ic
}

// computedColFilters generates all filters that can be derived from the list of
// computed column expressions from the given table. A computed column can be
// used as a filter when it has a constant value. That is true when:
//
// 1. All other columns it references are constant, because other filters in
// the query constrain them to be so.
// 2. All functions in the computed column expression can be folded into
// constants (i.e. they do not have problematic side effects).
//
// Note that computed columns can depend on other computed columns; in general
// the dependencies form an acyclic directed graph. computedColFilters will
// return filters for all constant computed columns, regardless of the order of
// their dependencies.
//
// As with checkConstraintFilters, computedColFilters do not really filter any
// rows, they are rather facts or guarantees about the data. Treating them as
// filters may allow some indexes to be constrained and used. Consider the
// following example:
//
// CREATE TABLE t (
// k INT NOT NULL,
// hash INT AS (k % 4) STORED,
// PRIMARY KEY (hash, k)
// )
//
// SELECT * FROM t WHERE k = 5
//
// Notice that the filter provided explicitly wouldn't allow the optimizer to
// seek using the primary index (it would have to fall back to a table scan).
// However, column "hash" can be proven to have the constant value of 1, since
// it's dependent on column "k", which has the constant value of 5. This enables
// usage of the primary index:
//
// scan t
// ├── columns: k:1(int!null) hash:2(int!null)
// ├── constraint: /2/1: [/1/5 - /1/5]
// ├── key: (2)
// └── fd: ()-->(1)
//
// The values of both columns in that index are known, enabling a single value
// constraint to be generated.
func (c *CustomFuncs) computedColFilters(
tabID opt.TableID, requiredFilters, optionalFilters memo.FiltersExpr,
) memo.FiltersExpr {
tabMeta := c.e.mem.Metadata().TableMeta(tabID)
if len(tabMeta.ComputedCols) == 0 {
return nil
}

// Start with set of constant columns, as derived from the list of filter
// conditions.
constCols := make(map[opt.ColumnID]opt.ScalarExpr)
c.findConstantFilterCols(constCols, tabID, requiredFilters)
c.findConstantFilterCols(constCols, tabID, optionalFilters)
if len(constCols) == 0 {
// No constant values could be derived from filters, so assume that there
// are also no constant computed columns.
return nil
}

// Construct a new filter condition for each computed column that is
// constant (i.e. all of its variables are in the constCols set).
var computedColFilters memo.FiltersExpr
for colID := range tabMeta.ComputedCols {
if c.tryFoldComputedCol(tabMeta, colID, constCols) {
constVal := constCols[colID]
// Note: Eq is not correct here because of NULLs.
eqOp := c.e.f.ConstructIs(c.e.f.ConstructVariable(colID), constVal)
computedColFilters = append(computedColFilters, c.e.f.ConstructFiltersItem(eqOp))
}
}
return computedColFilters
}
52 changes: 49 additions & 3 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,45 @@ func (c *CustomFuncs) GenerateMergeJoins(
// "sides" (in this example x,y on the left and z on the right) but there is
// no overlap.
//
// A lookup join can be created when the ON condition or implicit filters from
// CHECK constraints and computed columns constrain a prefix of the index
// columns to non-ranging constant values. To support this, the constant values
// are cross-joined with the input and used as key columns for the parent lookup
// join.
//
// For example, consider the tables and query below.
//
// CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT)
// CREATE TABLE xyz (
// x INT PRIMARY KEY,
// y INT,
// z INT NOT NULL,
// CHECK z IN (1, 2, 3),
// INDEX (z, y)
// )
// SELECT a, x FROM abc JOIN xyz ON a=y
//
// GenerateLookupJoins will perform the following transformation.
//
// Join LookupJoin(t@idx)
// / \ |
// / \ -> |
// Input Scan(t) Join
// / \
// / \
// Input Values(1, 2, 3)
//
// If a column is constrained to a single constant value, inlining normalization
// rules will reduce the cross join into a project.
//
// Join LookupJoin(t@idx)
// / \ |
// / \ -> |
// Input Scan(t) Project
// |
// |
// Input
//
func (c *CustomFuncs) GenerateLookupJoins(
grp memo.RelExpr,
joinType opt.Operator,
Expand All @@ -182,6 +221,12 @@ func (c *CustomFuncs) GenerateLookupJoins(
return
}

// Generate implicit filters from CHECK constraints and computed columns as
// optional filters to help generate lookup join keys.
optionalFilters := c.checkConstraintFilters(scanPrivate.Table)
computedColFilters := c.computedColFilters(scanPrivate.Table, on, optionalFilters)
optionalFilters = append(optionalFilters, computedColFilters...)

var pkCols opt.ColList
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes)
Expand All @@ -191,14 +236,15 @@ func (c *CustomFuncs) GenerateLookupJoins(
numIndexKeyCols := index.LaxKeyColumnCount()

var constFilters memo.FiltersExpr
allFilters := append(onFilters, optionalFilters...)

// Check if the first column in the index has an equality constraint, or if
// it is constrained to a constant value. This check doesn't guarantee that
// we will find lookup join key columns, but it avoids the unnecessary work
// in most cases.
firstIdxCol := scanPrivate.Table.IndexColumnID(index, 0)
if _, ok := rightEq.Find(firstIdxCol); !ok {
if _, _, ok := c.findJoinFilterConstants(onFilters, firstIdxCol); !ok {
if _, _, ok := c.findJoinFilterConstants(allFilters, firstIdxCol); !ok {
return
}
}
Expand Down Expand Up @@ -226,7 +272,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
// constant values. We cannot use a NULL value because the lookup
// join implements logic equivalent to simple equality between
// columns (where NULL never equals anything).
foundVals, onIdx, ok := c.findJoinFilterConstants(onFilters, idxCol)
foundVals, allIdx, ok := c.findJoinFilterConstants(allFilters, idxCol)
if !ok {
break
}
Expand Down Expand Up @@ -261,7 +307,7 @@ func (c *CustomFuncs) GenerateLookupJoins(

lookupJoin.KeyCols = append(lookupJoin.KeyCols, constColID)
rightSideCols = append(rightSideCols, idxCol)
constFilters = append(constFilters, onFilters[onIdx])
constFilters = append(constFilters, allFilters[allIdx])
}

if len(lookupJoin.KeyCols) == 0 {
Expand Down
74 changes: 0 additions & 74 deletions pkg/sql/opt/xform/select_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,80 +372,6 @@ func (c *CustomFuncs) GenerateConstrainedScans(
})
}

// computedColFilters generates all filters that can be derived from the list of
// computed column expressions from the given table. A computed column can be
// used as a filter when it has a constant value. That is true when:
//
// 1. All other columns it references are constant, because other filters in
// the query constrain them to be so.
// 2. All functions in the computed column expression can be folded into
// constants (i.e. they do not have problematic side effects).
//
// Note that computed columns can depend on other computed columns; in general
// the dependencies form an acyclic directed graph. computedColFilters will
// return filters for all constant computed columns, regardless of the order of
// their dependencies.
//
// As with checkConstraintFilters, computedColFilters do not really filter any
// rows, they are rather facts or guarantees about the data. Treating them as
// filters may allow some indexes to be constrained and used. Consider the
// following example:
//
// CREATE TABLE t (
// k INT NOT NULL,
// hash INT AS (k % 4) STORED,
// PRIMARY KEY (hash, k)
// )
//
// SELECT * FROM t WHERE k = 5
//
// Notice that the filter provided explicitly wouldn't allow the optimizer to
// seek using the primary index (it would have to fall back to a table scan).
// However, column "hash" can be proven to have the constant value of 1, since
// it's dependent on column "k", which has the constant value of 5. This enables
// usage of the primary index:
//
// scan t
// ├── columns: k:1(int!null) hash:2(int!null)
// ├── constraint: /2/1: [/1/5 - /1/5]
// ├── key: (2)
// └── fd: ()-->(1)
//
// The values of both columns in that index are known, enabling a single value
// constraint to be generated.
func (c *CustomFuncs) computedColFilters(
tabID opt.TableID, requiredFilters, optionalFilters memo.FiltersExpr,
) memo.FiltersExpr {
tabMeta := c.e.mem.Metadata().TableMeta(tabID)
if len(tabMeta.ComputedCols) == 0 {
return nil
}

// Start with set of constant columns, as derived from the list of filter
// conditions.
constCols := make(map[opt.ColumnID]opt.ScalarExpr)
c.findConstantFilterCols(constCols, tabID, requiredFilters)
c.findConstantFilterCols(constCols, tabID, optionalFilters)
if len(constCols) == 0 {
// No constant values could be derived from filters, so assume that there
// are also no constant computed columns.
return nil
}

// Construct a new filter condition for each computed column that is
// constant (i.e. all of its variables are in the constCols set).
var computedColFilters memo.FiltersExpr
for colID := range tabMeta.ComputedCols {
if c.tryFoldComputedCol(tabMeta, colID, constCols) {
constVal := constCols[colID]
// Note: Eq is not correct here because of NULLs.
eqOp := c.e.f.ConstructIs(c.e.f.ConstructVariable(colID), constVal)
computedColFilters = append(computedColFilters, c.e.f.ConstructFiltersItem(eqOp))
}
}
return computedColFilters
}

// findConstantFilterCols adds to constFilterCols mappings from table column ID
// to the constant value of that column. It does this by iterating over the
// given lists of filters and finding expressions that constrain columns to a
Expand Down
12 changes: 6 additions & 6 deletions pkg/sql/opt/xform/testdata/external/tpce
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,7 @@ project
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(3)
│ │ ├── project
│ │ │ ├── columns: "project_const_col_@6":10!null tx_id:1!null tx_rate:3!null
│ │ │ ├── columns: "lookup_join_const_col_@6":10!null tx_id:1!null tx_rate:3!null
│ │ │ ├── key: (1)
│ │ │ ├── fd: ()-->(10), (1)-->(3)
│ │ │ ├── scan taxrate
Expand All @@ -1560,7 +1560,7 @@ project
│ │ │ │ ├── key: (1)
│ │ │ │ └── fd: (1)-->(3)
│ │ │ └── projections
│ │ │ └── 0 [as="project_const_col_@6":10]
│ │ │ └── 0 [as="lookup_join_const_col_@6":10]
│ │ └── filters (true)
│ └── aggregations
│ └── sum [as=sum:8, outer=(3)]
Expand Down Expand Up @@ -2526,7 +2526,7 @@ project
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(3)
│ │ ├── project
│ │ │ ├── columns: "project_const_col_@6":10!null tx_id:1!null tx_rate:3!null
│ │ │ ├── columns: "lookup_join_const_col_@6":10!null tx_id:1!null tx_rate:3!null
│ │ │ ├── key: (1)
│ │ │ ├── fd: ()-->(10), (1)-->(3)
│ │ │ ├── scan taxrate
Expand All @@ -2536,7 +2536,7 @@ project
│ │ │ │ ├── key: (1)
│ │ │ │ └── fd: (1)-->(3)
│ │ │ └── projections
│ │ │ └── 0 [as="project_const_col_@6":10]
│ │ │ └── 0 [as="lookup_join_const_col_@6":10]
│ │ └── filters (true)
│ └── aggregations
│ └── sum [as=sum:8, outer=(3)]
Expand Down Expand Up @@ -4156,7 +4156,7 @@ update watch_item
│ │ ├── key: (7)
│ │ ├── fd: ()-->(5,8), (7)-->(9), (4)==(7), (7)==(4)
│ │ ├── project
│ │ │ ├── columns: "project_const_col_@5":29!null wl_id:7!null wl_c_id:8!null watch_list.crdb_internal_mvcc_timestamp:9
│ │ │ ├── columns: "lookup_join_const_col_@5":29!null wl_id:7!null wl_c_id:8!null watch_list.crdb_internal_mvcc_timestamp:9
│ │ │ ├── key: (7)
│ │ │ ├── fd: ()-->(8,29), (7)-->(9)
│ │ │ ├── select
Expand All @@ -4170,7 +4170,7 @@ update watch_item
│ │ │ │ └── filters
│ │ │ │ └── wl_c_id:8 = 0 [outer=(8), constraints=(/8: [/0 - /0]; tight), fd=()-->(8)]
│ │ │ └── projections
│ │ │ └── 'SYMB' [as="project_const_col_@5":29]
│ │ │ └── 'SYMB' [as="lookup_join_const_col_@5":29]
│ │ └── filters (true)
│ └── projections
│ └── 'SYMB' [as=wi_s_symb_new:10]
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/xform/testdata/external/tpce-no-stats
Original file line number Diff line number Diff line change
Expand Up @@ -1269,7 +1269,7 @@ limit
│ ├── fd: ()-->(7), (1,2)-->(3,4), (1)==(6), (6)==(1)
│ ├── limit hint: 20.00
│ ├── project
│ │ ├── columns: "project_const_col_@7":12!null hh_h_t_id:1!null hh_t_id:2!null hh_before_qty:3!null hh_after_qty:4!null
│ │ ├── columns: "lookup_join_const_col_@7":12!null hh_h_t_id:1!null hh_t_id:2!null hh_before_qty:3!null hh_after_qty:4!null
│ │ ├── key: (1,2)
│ │ ├── fd: ()-->(12), (1,2)-->(3,4)
│ │ ├── limit hint: 200.00
Expand All @@ -1279,7 +1279,7 @@ limit
│ │ │ ├── fd: (1,2)-->(3,4)
│ │ │ └── limit hint: 200.00
│ │ └── projections
│ │ └── 0 [as="project_const_col_@7":12]
│ │ └── 0 [as="lookup_join_const_col_@7":12]
│ └── filters (true)
└── 20

Expand Down
Loading

0 comments on commit 1a3fd72

Please sign in to comment.