From 129d9bc99dd8a7da40e2b51cdf45a44eca0e53e0 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 15 Jun 2022 16:38:34 +0800 Subject: [PATCH] planner: introduce new cost formula for Selection/TableScan/IndexScan (#35378) ref pingcap/tidb#35240 --- planner/core/plan_cost.go | 100 ++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 20 deletions(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index bfa04ad7a5803..3ec446369631a 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -34,6 +34,11 @@ const ( CostFlagUseTrueCardinality ) +const ( + modelVer1 = 1 + modelVer2 = 2 +) + func hasCostFlag(costFlag, flag uint64) bool { return (costFlag & flag) > 0 } @@ -61,21 +66,40 @@ func (p *PhysicalSelection) GetPlanCost(taskType property.TaskType, costFlag uin if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } - var cpuFactor float64 - switch taskType { - case property.RootTaskType, property.MppTaskType: - cpuFactor = p.ctx.GetSessionVars().GetCPUFactor() - case property.CopSingleReadTaskType, property.CopDoubleReadTaskType: - cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor() - default: - return 0, errors.Errorf("unknown task type %v", taskType) + + var selfCost float64 + switch p.ctx.GetSessionVars().CostModelVersion { + case modelVer1: // selection cost: rows * cpu-factor + var cpuFactor float64 + switch taskType { + case property.RootTaskType, property.MppTaskType: + cpuFactor = p.ctx.GetSessionVars().GetCPUFactor() + case property.CopSingleReadTaskType, property.CopDoubleReadTaskType: + cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor() + default: + return 0, errors.Errorf("unknown task type %v", taskType) + } + selfCost = getCardinality(p.children[0], costFlag) * cpuFactor + case modelVer2: // selection cost: rows * num-filters * cpu-factor + var cpuFactor float64 + switch taskType { + case property.RootTaskType: + cpuFactor = p.ctx.GetSessionVars().GetCPUFactor() + case property.MppTaskType: // use a dedicated cpu-factor for TiFlash + cpuFactor = p.ctx.GetSessionVars().GetTiFlashCPUFactor() + case property.CopSingleReadTaskType, property.CopDoubleReadTaskType: + cpuFactor = p.ctx.GetSessionVars().GetCopCPUFactor() + default: + return 0, errors.Errorf("unknown task type %v", taskType) + } + selfCost = getCardinality(p.children[0], costFlag) * float64(len(p.Conditions)) * cpuFactor } + childCost, err := p.children[0].GetPlanCost(taskType, costFlag) if err != nil { return 0, err } - p.planCost = childCost - p.planCost += getCardinality(p.children[0], costFlag) * cpuFactor // selection cost: rows * cpu-factor + p.planCost = childCost + selfCost p.planCostInit = true return p.planCost, nil } @@ -343,12 +367,34 @@ func (p *PhysicalTableScan) GetPlanCost(taskType property.TaskType, costFlag uin if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } - // scan cost: rows * row-size * scan-factor - scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) - if p.Desc { - scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + + var selfCost float64 + switch p.ctx.GetSessionVars().CostModelVersion { + case modelVer1: // scan cost: rows * row-size * scan-factor + scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + if p.Desc { + scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + } + selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + case modelVer2: // scan cost: rows * log2(row-size) * scan-factor + var scanFactor float64 + switch taskType { + case property.MppTaskType: // use a dedicated scan-factor for TiFlash + // no need to distinguish `Scan` and `DescScan` for TiFlash for now + scanFactor = p.ctx.GetSessionVars().GetTiFlashScanFactor() + default: // for TiKV + scanFactor = p.ctx.GetSessionVars().GetScanFactor(p.Table) + if p.Desc { + scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + } + } + // the formula `log(rowSize)` is based on experiment results + rowSize := math.Max(p.getScanRowSize(), 2.0) // to guarantee logRowSize >= 1 + logRowSize := math.Log2(rowSize) + selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor } - p.planCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + + p.planCost = selfCost p.planCostInit = true return p.planCost, nil } @@ -358,12 +404,26 @@ func (p *PhysicalIndexScan) GetPlanCost(taskType property.TaskType, costFlag uin if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) { return p.planCost, nil } - // scan cost: rows * row-size * scan-factor - scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) - if p.Desc { - scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + + var selfCost float64 + switch p.ctx.GetSessionVars().CostModelVersion { + case modelVer1: // scan cost: rows * row-size * scan-factor + scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + if p.Desc { + scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + } + selfCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + case modelVer2: + scanFactor := p.ctx.GetSessionVars().GetScanFactor(p.Table) + if p.Desc { + scanFactor = p.ctx.GetSessionVars().GetDescScanFactor(p.Table) + } + rowSize := math.Max(p.getScanRowSize(), 2.0) + logRowSize := math.Log2(rowSize) + selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor } - p.planCost = getCardinality(p, costFlag) * p.getScanRowSize() * scanFactor + + p.planCost = selfCost p.planCostInit = true return p.planCost, nil }