Skip to content

Commit

Permalink
planner: fix wrong selectivity for inner selection in index join (#10633
Browse files Browse the repository at this point in the history
) (#10856)
  • Loading branch information
eurekaka authored and winkyao committed Jun 20, 2019
1 parent 7b8e174 commit 1b320a1
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 79 deletions.
4 changes: 2 additions & 2 deletions cmd/explaintest/r/explain_complex.result
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ Projection_9 0.00 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test.dt
├─TableReader_38 0.00 root data:Selection_37
│ └─Selection_37 0.00 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592)
│ └─TableScan_36 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo
└─IndexLookUp_17 3.33 root
└─IndexLookUp_17 0.00 root
├─IndexScan_14 10.00 cop table:rr, index:aid, dic, range: decided by [test.dt.aid test.dt.dic], keep order:false, stats:pseudo
└─Selection_16 3.33 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─Selection_16 0.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─TableScan_15 10.00 cop table:rr, keep order:false, stats:pseudo
explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr;
id count task operator info
Expand Down
12 changes: 6 additions & 6 deletions cmd/explaintest/r/explain_complex_stats.result
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ Projection_12 424.00 root test.gad.id, test.dd.id, test.gad.aid, test.gad.cm, te
├─TableReader_28 424.00 root data:Selection_27
│ └─Selection_27 424.00 cop eq(test.gad.bm, 0), eq(test.gad.pt, "android"), gt(test.gad.t, 1478143908)
│ └─TableScan_26 1999.00 cop table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_22 455.80 root
└─IndexLookUp_22 0.23 root
├─IndexScan_19 1.00 cop table:dd, index:aid, dic, range: decided by [test.gad.aid test.gad.ip], keep order:false
└─Selection_21 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908)
└─Selection_21 0.23 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908)
└─TableScan_20 1.00 cop table:dd, keep order:false
explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
id count task operator info
Expand All @@ -144,9 +144,9 @@ Projection_9 170.34 root test.gad.id, test.sdk.id, test.gad.aid, test.gad.cm, te
├─TableReader_22 170.34 root data:Selection_21
│ └─Selection_21 170.34 cop eq(test.gad.bm, 0), eq(test.gad.dit, "mac"), eq(test.gad.pt, "ios"), gt(test.gad.t, 1477971479)
│ └─TableScan_20 1999.00 cop table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_16 509.04 root
└─IndexLookUp_16 0.25 root
├─IndexScan_13 1.00 cop table:sdk, index:aid, dic, range: decided by [test.gad.aid test.gad.dic], keep order:false
└─Selection_15 509.04 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479)
└─Selection_15 0.25 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479)
└─TableScan_14 1.00 cop table:dd, keep order:false
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
id count task operator info
Expand All @@ -164,9 +164,9 @@ Projection_9 428.32 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test.
├─TableReader_38 428.32 root data:Selection_37
│ └─Selection_37 428.32 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592)
│ └─TableScan_36 2000.00 cop table:dt, range:[0,+inf], keep order:false
└─IndexLookUp_17 970.00 root
└─IndexLookUp_17 0.48 root
├─IndexScan_14 1.00 cop table:rr, index:aid, dic, range: decided by [test.dt.aid test.dt.dic], keep order:false
└─Selection_16 970.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─Selection_16 0.48 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─TableScan_15 1.00 cop table:rr, keep order:false
explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr;
id count task operator info
Expand Down
42 changes: 21 additions & 21 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -316,41 +316,41 @@ Projection_11 5.00 root 9_aux_0
├─TableReader_15 5.00 root data:TableScan_14
│ └─TableScan_14 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_20 1.00 root funcs:count(1)
└─IndexJoin_49 2.40 root inner join, inner:TableReader_48, outer key:test.s.a, inner key:test.t1.a
├─IndexReader_41 2.40 root index:Selection_40
│ └─Selection_40 2.40 cop eq(3, test.t.a)
│ └─IndexScan_39 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_48 0.80 root data:Selection_47
└─Selection_47 0.80 cop eq(3, test.t.a)
└─TableScan_46 1.00 cop table:t1, range: decided by [test.s.a], keep order:false
└─IndexJoin_48 2.40 root inner join, inner:TableReader_47, outer key:test.s.a, inner key:test.t1.a
├─IndexReader_40 2.40 root index:Selection_39
│ └─Selection_39 2.40 cop eq(3, test.t.a)
│ └─IndexScan_38 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_47 0.80 root data:Selection_46
└─Selection_46 0.80 cop eq(3, test.t.a)
└─TableScan_45 1.00 cop table:t1, range: decided by [test.s.a], keep order:false
explain select t.c in (select count(*) from t s left join t t1 on s.a = t1.a where 3 = t.a and s.b = 3) from t;
id count task operator info
Projection_10 5.00 root 9_aux_0
└─Apply_12 5.00 root left outer semi join, inner:StreamAgg_19, other cond:eq(test.t.c, count(*))
├─TableReader_14 5.00 root data:TableScan_13
│ └─TableScan_13 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_19 1.00 root funcs:count(1)
└─IndexJoin_43 2.40 root left outer join, inner:TableReader_42, outer key:test.s.a, inner key:test.t1.a
├─IndexReader_35 2.40 root index:Selection_34
│ └─Selection_34 2.40 cop eq(3, test.t.a)
│ └─IndexScan_33 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_42 0.80 root data:Selection_41
└─Selection_41 0.80 cop eq(3, test.t.a)
└─TableScan_40 1.00 cop table:t1, range: decided by [test.s.a], keep order:false
└─IndexJoin_42 2.40 root left outer join, inner:TableReader_41, outer key:test.s.a, inner key:test.t1.a
├─IndexReader_34 2.40 root index:Selection_33
│ └─Selection_33 2.40 cop eq(3, test.t.a)
│ └─IndexScan_32 3.00 cop table:s, index:b, range:[3,3], keep order:false
└─TableReader_41 0.80 root data:Selection_40
└─Selection_40 0.80 cop eq(3, test.t.a)
└─TableScan_39 1.00 cop table:t1, range: decided by [test.s.a], keep order:false
explain select t.c in (select count(*) from t s right join t t1 on s.a = t1.a where 3 = t.a and t1.b = 3) from t;
id count task operator info
Projection_10 5.00 root 9_aux_0
└─Apply_12 5.00 root left outer semi join, inner:StreamAgg_19, other cond:eq(test.t.c, count(*))
├─TableReader_14 5.00 root data:TableScan_13
│ └─TableScan_13 5.00 cop table:t, range:[-inf,+inf], keep order:false
└─StreamAgg_19 1.00 root funcs:count(1)
└─IndexJoin_43 2.40 root right outer join, inner:TableReader_42, outer key:test.t1.a, inner key:test.s.a
├─TableReader_42 0.80 root data:Selection_41
│ └─Selection_41 0.80 cop eq(3, test.t.a)
│ └─TableScan_40 1.00 cop table:s, range: decided by [test.t1.a], keep order:false
└─IndexReader_35 2.40 root index:Selection_34
└─Selection_34 2.40 cop eq(3, test.t.a)
└─IndexScan_33 3.00 cop table:t1, index:b, range:[3,3], keep order:false
└─IndexJoin_42 2.40 root right outer join, inner:TableReader_41, outer key:test.t1.a, inner key:test.s.a
├─TableReader_41 0.80 root data:Selection_40
│ └─Selection_40 0.80 cop eq(3, test.t.a)
│ └─TableScan_39 1.00 cop table:s, range: decided by [test.t1.a], keep order:false
└─IndexReader_34 2.40 root index:Selection_33
└─Selection_33 2.40 cop eq(3, test.t.a)
└─IndexScan_32 3.00 cop table:t1, index:b, range:[3,3], keep order:false
drop table if exists t;
create table t(a int unsigned);
explain select t.a = '123455' from t;
Expand Down
22 changes: 11 additions & 11 deletions cmd/explaintest/r/topn_push_down.result
Original file line number Diff line number Diff line change
Expand Up @@ -169,20 +169,20 @@ LIMIT 0, 5;
id count task operator info
Projection_13 0.00 root test.te.expected_time
└─Limit_19 0.00 root offset:0, count:5
└─IndexJoin_101 0.00 root left outer join, inner:IndexReader_100, outer key:test.tr.id, inner key:test.p.relate_id
├─TopN_104 0.00 root test.te.expect_time:asc, offset:0, count:5
└─IndexJoin_91 0.00 root left outer join, inner:IndexReader_90, outer key:test.tr.id, inner key:test.p.relate_id
├─TopN_94 0.00 root test.te.expect_time:asc, offset:0, count:5
│ └─IndexJoin_35 0.00 root inner join, inner:IndexLookUp_34, outer key:test.tr.id, inner key:test.te.trade_id
│ ├─IndexLookUp_83 0.00 root
│ │ ├─Selection_81 0.00 cop eq(test.tr.business_type, 18), in(test.tr.trade_type, 1)
│ │ │ └─IndexScan_79 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
│ │ └─Selection_82 0.00 cop eq(test.tr.brand_identy, 32314), eq(test.tr.domain_type, 2)
│ │ └─TableScan_80 0.00 cop table:tr, keep order:false, stats:pseudo
│ └─IndexLookUp_34 250.00 root
│ ├─IndexLookUp_73 0.00 root
│ │ ├─Selection_71 0.00 cop eq(test.tr.business_type, 18), in(test.tr.trade_type, 1)
│ │ │ └─IndexScan_69 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
│ │ └─Selection_72 0.00 cop eq(test.tr.brand_identy, 32314), eq(test.tr.domain_type, 2)
│ │ └─TableScan_70 0.00 cop table:tr, keep order:false, stats:pseudo
│ └─IndexLookUp_34 0.25 root
│ ├─IndexScan_31 10.00 cop table:te, index:trade_id, range: decided by [test.tr.id], keep order:false, stats:pseudo
│ └─Selection_33 250.00 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000)
│ └─Selection_33 0.25 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000)
│ └─TableScan_32 10.00 cop table:te, keep order:false, stats:pseudo
└─IndexReader_100 10.00 root index:IndexScan_99
└─IndexScan_99 10.00 cop table:p, index:relate_id, range: decided by [test.tr.id], keep order:false, stats:pseudo
└─IndexReader_90 10.00 root index:IndexScan_89
└─IndexScan_89 10.00 cop table:p, index:relate_id, range: decided by [test.tr.id], keep order:false, stats:pseudo
desc select 1 as a from dual order by a limit 1;
id count task operator info
Projection_6 1.00 root 1
Expand Down
20 changes: 10 additions & 10 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd
│ └─TableReader_49 36870000.00 root data:Selection_48
│ └─Selection_48 36870000.00 cop lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000)
│ └─TableScan_47 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─IndexLookUp_25 162945114.27 root
└─IndexLookUp_25 0.54 root
├─IndexScan_22 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
└─Selection_24 162945114.27 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─Selection_24 0.54 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableScan_23 1.00 cop table:lineitem, keep order:false
/*
Q4 Order Priority Checking Query
Expand Down Expand Up @@ -302,9 +302,9 @@ Sort_10 1.00 root tpch.orders.o_orderpriority:asc
├─TableReader_33 2925937.50 root data:Selection_32
│ └─Selection_32 2925937.50 cop ge(tpch.orders.o_orderdate, 1995-01-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1995-04-01)
│ └─TableScan_31 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─IndexLookUp_20 240004648.80 root
└─IndexLookUp_20 0.80 root
├─IndexScan_17 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
└─Selection_19 240004648.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate)
└─Selection_19 0.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate)
└─TableScan_18 1.00 cop table:lineitem, keep order:false
/*
Q5 Local Supplier Volume Query
Expand Down Expand Up @@ -670,9 +670,9 @@ Projection_17 20.00 root tpch.customer.c_custkey, tpch.customer.c_name, 9_col_0,
│ │ ├─TableReader_59 3017307.69 root data:Selection_58
│ │ │ └─Selection_58 3017307.69 cop ge(tpch.orders.o_orderdate, 1993-08-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1993-11-01)
│ │ │ └─TableScan_57 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ └─IndexLookUp_41 73916005.00 root
│ │ └─IndexLookUp_41 0.25 root
│ │ ├─IndexScan_38 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ └─Selection_40 73916005.00 cop eq(tpch.lineitem.l_returnflag, "R")
│ │ └─Selection_40 0.25 cop eq(tpch.lineitem.l_returnflag, "R")
│ │ └─TableScan_39 1.00 cop table:lineitem, keep order:false
│ └─TableReader_33 1.00 root data:TableScan_32
│ └─TableScan_32 1.00 cop table:customer, range: decided by [tpch.orders.o_custkey], keep order:false
Expand Down Expand Up @@ -1232,9 +1232,9 @@ Projection_25 100.00 root tpch.supplier.s_name, 17_col_0
│ │ │ │ ├─TableReader_109 36517371.00 root data:Selection_108
│ │ │ │ │ └─Selection_108 36517371.00 cop eq(tpch.orders.o_orderstatus, "F")
│ │ │ │ │ └─TableScan_107 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ │ │ └─IndexLookUp_99 240004648.80 root
│ │ │ │ └─IndexLookUp_99 0.80 root
│ │ │ │ ├─IndexScan_96 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ │ │ └─Selection_98 240004648.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate)
│ │ │ │ └─Selection_98 0.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate)
│ │ │ │ └─TableScan_97 1.00 cop table:lineitem, keep order:false
│ │ │ └─TableReader_114 500000.00 root data:TableScan_113
│ │ │ └─TableScan_113 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
Expand All @@ -1244,9 +1244,9 @@ Projection_25 100.00 root tpch.supplier.s_name, 17_col_0
│ └─IndexLookUp_81 1.00 root
│ ├─IndexScan_79 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false
│ └─TableScan_80 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_37 240004648.80 root
└─IndexLookUp_37 0.80 root
├─IndexScan_34 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false
└─Selection_36 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate)
└─Selection_36 0.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate)
└─TableScan_35 1.00 cop table:lineitem, keep order:false
/*
Q22 Global Sales Opportunity Query
Expand Down
19 changes: 17 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,23 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn

is.initSchema(ds.id, idx, cop.tablePlan != nil)
indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo)
path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64}
is.addPushedDownSelection(cop, ds, math.MaxFloat64, path)
path := &accessPath{
indexFilters: indexConds,
tableFilters: tblConds,
countAfterAccess: rowCount,
}
// Assume equal conditions used by index join and other conditions are independent.
if len(indexConds) > 0 {
selectivity, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds)
if err != nil {
logutil.Logger(context.Background()).Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
}
path.countAfterIndex = rowCount * selectivity
}
selectivity := ds.stats.RowCount / ds.tableStats.RowCount
finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount)
is.addPushedDownSelection(cop, ds, path, finalStats)
t := finishCopTask(ds.ctx, cop)
return t.plan()
}
Expand Down
Loading

0 comments on commit 1b320a1

Please sign in to comment.