Skip to content

Commit

Permalink
extractFD for group/datasource/selection/projection and ported some t…
Browse files Browse the repository at this point in the history
…est (pingcap#3)
  • Loading branch information
AilinKid authored Jan 5, 2022
1 parent 327491a commit 21dd9b3
Show file tree
Hide file tree
Showing 7 changed files with 916 additions and 10 deletions.
43 changes: 43 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/opcode"
"github.com/pingcap/tidb/parser/terror"
fd "github.com/pingcap/tidb/planner/functional_dependency"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/privilege"
Expand Down Expand Up @@ -4244,6 +4245,48 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName, as
return result, nil
}

func (ds *DataSource) extractFD() *fd.FDSet {
// FD in datasource (leaf node) can be cached and reused.
if ds.fdSet == nil {
fds := &fd.FDSet{}
allCols := fd.NewFastIntSet()
// should use the column's unique ID avoiding fdSet conflict.
for _, col := range ds.TblCols {
// todo: change it to int64
allCols.Insert(int(col.UniqueID))
}
for _, idx := range ds.tableInfo.Indices {
keyCols := fd.NewFastIntSet()
allColIsNotNull := true
for _, idxCol := range idx.Columns {
// Note: even the prefix column can also be the FD. For example:
// unique(char_column(10)), will also guarantee the prefix to be
// the unique which means the while column is unique too.
refCol := ds.tableInfo.Columns[idxCol.Offset]
if !mysql.HasNotNullFlag(refCol.Flag) {
allColIsNotNull = false
}
keyCols.Insert(int(ds.TblCols[idxCol.Offset].UniqueID))
}
if idx.Primary {
fds.AddStrictFunctionalDependency(keyCols, allCols)
fds.MakeNotNull(keyCols)
} else if idx.Unique {
if allColIsNotNull {
fds.AddStrictFunctionalDependency(keyCols, allCols)
fds.MakeNotNull(keyCols)
} else {
fds.AddLaxFunctionalDependency(keyCols, allCols)
}
} else {
fds.AddLaxFunctionalDependency(keyCols, allCols)
}
}
ds.fdSet = fds
}
return ds.fdSet
}

func (b *PlanBuilder) timeRangeForSummaryTable() QueryTimeRange {
const defaultSummaryDuration = 30 * time.Minute
hints := b.TableHints()
Expand Down
106 changes: 106 additions & 0 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/pingcap/tidb/parser/auth"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
fd "github.com/pingcap/tidb/planner/functional_dependency"
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
Expand Down Expand Up @@ -308,6 +309,31 @@ type LogicalProjection struct {
AvoidColumnEvaluator bool
}

// extractFD implements the logical plan interface, extracting the FD from bottom up.
func (p *LogicalProjection) extractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := p.logicalSchemaProducer.extractFD()
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
for _, one := range p.Schema().Columns {
outputColsUniqueIDs.Insert(int(one.UniqueID))
if mysql.HasNotNullFlag(one.RetType.Flag) {
notnullColsUniqueIDs.Insert(int(one.UniqueID))
}
}
// TODO: enclose the project expr as a `special column`, assigning unique ID and writing it to FDSet.
// projection(1 as x, (b+1) as y, b) from t
// once the upper layer use x, y to do more computation, we better maintain this FD in FDSet by:
// fds.AddConstants(fd.NewFastIntSet(x.uniqueID))
// fds.AddStrictFunctionalDependency(fd.NewFastIntSet(b.uniqueID), fd.NewFastIntSet((b+1).uniqueID))

// apply operator's characteristic's FD setting.
// 1: since the distinct attribute is built as firstRow agg func, we don't need to think about it here.
fds.ProjectCols(outputColsUniqueIDs)
return fds
}

// ExtractCorrelatedCols implements LogicalPlan interface.
func (p *LogicalProjection) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
corCols := make([]*expression.CorrelatedColumn, 0, len(p.Exprs))
Expand Down Expand Up @@ -363,6 +389,41 @@ func (la *LogicalAggregation) HasOrderBy() bool {
return false
}

// extractFD implements the logical plan interface, extracting the FD from bottom up.
func (la *LogicalAggregation) extractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := la.logicalSchemaProducer.extractFD()
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
groupByColsUniqueIDs := fd.NewFastIntSet()
for _, one := range la.Schema().Columns {
outputColsUniqueIDs.Insert(int(one.UniqueID))
if mysql.HasNotNullFlag(one.RetType.Flag) {
notnullColsUniqueIDs.Insert(int(one.UniqueID))
}
}
// TODO: enclose the group expr as a `special column`, assigning unique ID and writing it to FDSet.
for _, one := range la.GetGroupByCols() {
groupByColsUniqueIDs.Insert(int(one.UniqueID))
}

// apply operator's characteristic's FD setting.
if len(la.GroupByItems) == 0 {
fds.MaxOneRow(outputColsUniqueIDs)
} else {
// eliminating input columns that are un-projected.
fds.ProjectCols(outputColsUniqueIDs)

if !groupByColsUniqueIDs.SubsetOf(notnullColsUniqueIDs) {
fds.AddLaxFunctionalDependency(groupByColsUniqueIDs, outputColsUniqueIDs)
} else {
fds.AddStrictFunctionalDependency(groupByColsUniqueIDs, outputColsUniqueIDs)
}
}
return fds
}

// CopyAggHints copies the aggHints from another LogicalAggregation.
func (la *LogicalAggregation) CopyAggHints(agg *LogicalAggregation) {
// TODO: Copy the hint may make the un-applicable hint throw the
Expand Down Expand Up @@ -457,6 +518,51 @@ type LogicalSelection struct {
buildByHaving bool
}

func (p *LogicalSelection) extractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := p.baseLogicalPlan.extractFD()
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
for _, one := range p.Schema().Columns {
outputColsUniqueIDs.Insert(int(one.UniqueID))
if mysql.HasNotNullFlag(one.RetType.Flag) {
notnullColsUniqueIDs.Insert(int(one.UniqueID))
}
}

// extract the column NOT NULL rejection characteristic from selection condition.
// CNF considered only, DNF doesn't have its meanings (cause that condition's eval may don't take effect)
//
// Take this case: select * from t where (a = 1) and (b is null):
//
// If we wanna where phrase eval to true, two pre-condition: {a=1} and {b is null} both need to be true.
// Hence, we assert that:
//
// 1: `a` mustn't be null since `NULL = 1` is evaluated as NULL.
// 2: `b` can be null since `NULL is NULL` is evaluated as true.
//
// As a result, `a` will be extracted to abound the FDSet.
for _, condition := range p.Conditions {
var cols []*expression.Column
cols = expression.ExtractColumnsFromExpressions(cols, []expression.Expression{condition}, nil)
if isNullRejected(p.ctx, p.Schema(), condition) {
for _, col := range cols {
notnullColsUniqueIDs.Insert(int(col.UniqueID))
}
}
}
// TODO: extract constant cols and equivalence cols.

// apply operator's characteristic's FD setting.
fds.MakeNotNull(notnullColsUniqueIDs)
// fds.AddConstants()
// fds.AddEquivalence()

fds.ProjectCols(outputColsUniqueIDs)
return fds
}

// ExtractCorrelatedCols implements LogicalPlan interface.
func (p *LogicalSelection) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
corCols := make([]*expression.CorrelatedColumn, 0, len(p.Conditions))
Expand Down
12 changes: 10 additions & 2 deletions planner/core/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,20 @@ type baseLogicalPlan struct {
self LogicalPlan
maxOneRow bool
children []LogicalPlan
fdSet *fd.FDSet
// fdSet is a set of functional dependencies(FDs) which powers many optimizations,
// including eliminating unnecessary DISTINCT operators, simplifying ORDER BY columns,
// removing Max1Row operators, and mapping semi-joins to inner-joins.
// for now, it's hard to maintain in individual operator, build it from bottom up when using.
fdSet *fd.FDSet
}

// extractFD return the children[0]'s fdSet if there are no adding/removing fd in this logic plan.
func (p *baseLogicalPlan) extractFD() *fd.FDSet {
return p.children[0].extractFD()
fds := &fd.FDSet{}
for _, ch := range p.children {
fds.AddFrom(ch.extractFD())
}
return fds
}

func (p *baseLogicalPlan) MaxOneRow() bool {
Expand Down
12 changes: 12 additions & 0 deletions planner/functional_dependency/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package functional_dependency

// Theory to Practice
//
// For more rigorous examination of functional dependencies and their
// interaction with various SQL operators, see the following Master's Thesis:
//
// Norman Paulley, Glenn. (2000).
// Exploiting Functional Dependence in Query Optimization.
// https://cs.uwaterloo.ca/research/tr/2000/11/CS-2000-11.thesis.pdf

// TODO: Add the RFC design.
Loading

0 comments on commit 21dd9b3

Please sign in to comment.