From bf37302f7be0791c59fca5bf7be7d21e786fd07a Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 19 Feb 2019 11:19:30 +0800 Subject: [PATCH] planner/core: implement skyline pruning (#9337) --- cmd/explaintest/main.go | 5 - .../r/access_path_selection.result | 21 ++ cmd/explaintest/r/explain_complex.result | 17 +- .../r/explain_complex_stats.result | 17 +- cmd/explaintest/r/explain_easy.result | 90 ++++----- cmd/explaintest/r/explain_easy_stats.result | 8 +- cmd/explaintest/r/select.result | 40 ++-- cmd/explaintest/r/topn_push_down.result | 18 +- cmd/explaintest/r/tpch.result | 14 +- cmd/explaintest/t/access_path_selection.test | 21 ++ expression/util.go | 21 ++ go.mod | 1 + go.sum | 6 +- planner/core/find_best_task.go | 185 ++++++++++++++---- planner/core/logical_plan_test.go | 100 ++++++++++ statistics/selectivity_test.go | 8 +- 16 files changed, 417 insertions(+), 155 deletions(-) create mode 100644 cmd/explaintest/r/access_path_selection.result create mode 100644 cmd/explaintest/t/access_path_selection.test diff --git a/cmd/explaintest/main.go b/cmd/explaintest/main.go index d3650f8a05eb1..e896da887546d 100644 --- a/cmd/explaintest/main.go +++ b/cmd/explaintest/main.go @@ -566,11 +566,6 @@ func loadAllTests() ([]string, error) { if strings.HasSuffix(name, ".test") { name = strings.TrimSuffix(name, ".test") - // if we use record and the result file exists, skip generating - if record && resultExists(name) { - continue - } - if create && !strings.HasSuffix(name, "_stats") { continue } diff --git a/cmd/explaintest/r/access_path_selection.result b/cmd/explaintest/r/access_path_selection.result new file mode 100644 index 0000000000000..c3cf9e1386074 --- /dev/null +++ b/cmd/explaintest/r/access_path_selection.result @@ -0,0 +1,21 @@ +CREATE TABLE `outdated_statistics` ( +`a` int, +`b` int, +`c` int, +INDEX idx_a(a), +INDEX idx_ab(a,b) +); +insert into outdated_statistics values (2, 2, 2); +insert into outdated_statistics values (3, 3, 3); +insert into outdated_statistics values (4, 4, 4); +analyze table outdated_statistics; +insert into outdated_statistics values (1, 1, 1); +insert into outdated_statistics values (1, 2, 2); +insert into outdated_statistics values (1, 3, 3); +analyze table outdated_statistics index idx_ab; +explain select * from outdated_statistics where a=1 and b=1 and c=1; +id count task operator info +IndexLookUp_11 0.00 root +├─IndexScan_8 1.00 cop table:outdated_statistics, index:a, b, range:[1 1,1 1], keep order:false +└─Selection_10 0.00 cop eq(test.outdated_statistics.c, 1) + └─TableScan_9 1.00 cop table:outdated_statistics, keep order:false diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index c938d68844ad1..6380dd5b5f9f3 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -153,9 +153,9 @@ id count task operator info Projection_9 0.00 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_12 0.00 root offset:0, count:2000 └─IndexJoin_18 0.00 root inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_42 0.00 root data:Selection_41 - │ └─Selection_41 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) - │ └─TableScan_40 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo + ├─TableReader_38 0.00 root data:Selection_37 + │ └─Selection_37 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) + │ └─TableScan_36 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo └─IndexLookUp_17 3.33 root ├─IndexScan_14 10.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo └─Selection_16 3.33 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) @@ -164,10 +164,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su id count task operator info Projection_5 1.00 root test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2 └─HashAgg_7 1.00 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr) - └─IndexLookUp_28 0.00 root - ├─IndexScan_25 0.40 cop table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo - └─Selection_27 0.00 cop eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200) - └─TableScan_26 0.40 cop table:pp, keep order:false, stats:pseudo + └─IndexLookUp_24 0.00 root + ├─IndexScan_21 0.40 cop table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo + └─Selection_23 0.00 cop eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200) + └─TableScan_22 0.40 cop table:pp, keep order:false, stats:pseudo CREATE TABLE `tbl_001` (`a` int, `b` int); CREATE TABLE `tbl_002` (`a` int, `b` int); CREATE TABLE `tbl_003` (`a` int, `b` int); @@ -200,6 +200,3 @@ HashAgg_34 72000.00 root group by:col_1, funcs:sum(col_0) │ └─TableScan_58 10000.00 cop table:tbl_008, range:[-inf,+inf], keep order:false, stats:pseudo └─TableReader_62 10000.00 root data:TableScan_61 └─TableScan_61 10000.00 cop table:tbl_009, range:[-inf,+inf], keep order:false, stats:pseudo - - - diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 54b78c59a364d..e05f88234f156 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -161,9 +161,9 @@ id count task operator info Projection_9 428.32 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_12 428.32 root offset:0, count:2000 └─IndexJoin_18 428.32 root inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_42 428.32 root data:Selection_41 - │ └─Selection_41 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) - │ └─TableScan_40 2000.00 cop table:dt, range:[0,+inf], keep order:false + ├─TableReader_38 428.32 root data:Selection_37 + │ └─Selection_37 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592) + │ └─TableScan_36 2000.00 cop table:dt, range:[0,+inf], keep order:false └─IndexLookUp_17 970.00 root ├─IndexScan_14 1.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false └─Selection_16 970.00 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) @@ -172,10 +172,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su id count task operator info Projection_5 207.86 root test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2 └─HashAgg_7 207.86 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr) - └─IndexLookUp_28 207.86 root - ├─IndexScan_22 627.00 cop table:pp, index:ps, range:[2,2], keep order:false - └─Selection_24 207.86 cop ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) - └─TableScan_23 627.00 cop table:pp, keep order:false + └─IndexLookUp_24 207.86 root + ├─IndexScan_18 627.00 cop table:pp, index:ps, range:[2,2], keep order:false + └─Selection_20 207.86 cop ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) + └─TableScan_19 627.00 cop table:pp, keep order:false drop table if exists tbl_001; CREATE TABLE tbl_001 (a int, b int); load stats 's/explain_complex_stats_tbl_001.json'; @@ -226,6 +226,3 @@ HashAgg_34 18000.00 root group by:col_1, funcs:sum(col_0) │ └─TableScan_58 2000.00 cop table:tbl_008, range:[-inf,+inf], keep order:false └─TableReader_62 2000.00 root data:TableScan_61 └─TableScan_61 2000.00 cop table:tbl_009, range:[-inf,+inf], keep order:false - - - diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 435cfa9350861..0d6b9da8c17f5 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -38,8 +38,8 @@ TableReader_6 3333.33 root data:TableScan_5 └─TableScan_5 3333.33 cop table:t1, range:(0,+inf], keep order:false, stats:pseudo explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info IndexJoin_11 4166.67 root left outer join, inner:IndexLookUp_10, outer key:test.t1.c2, inner key:test.t2.c1 @@ -101,10 +101,10 @@ Projection_13 10000.00 root k └─MergeJoin_15 10000.00 root left outer join, left key:test.t1.c1, right key:s.c1 ├─TableReader_18 10000.00 root data:TableScan_17 │ └─TableScan_17 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo - └─Selection_20 8000.00 root ne(k, 0) - └─Projection_21 10000.00 root 1, s.c1 - └─TableReader_23 10000.00 root data:TableScan_22 - └─TableScan_22 10000.00 cop table:s, range:[-inf,+inf], keep order:true, stats:pseudo + └─Selection_19 8000.00 root ne(k, 0) + └─Projection_20 10000.00 root 1, s.c1 + └─TableReader_22 10000.00 root data:TableScan_21 + └─TableScan_21 10000.00 cop table:s, range:[-inf,+inf], keep order:true, stats:pseudo explain select * from information_schema.columns; id count task operator info MemTableScan_4 10000.00 root @@ -122,8 +122,8 @@ Projection_12 10000.00 root eq(test.t1.c2, test.t2.c2) explain select * from t1 order by c1 desc limit 1; id count task operator info Limit_10 1.00 root offset:0, count:1 -└─TableReader_21 1.00 root data:Limit_20 - └─Limit_20 1.00 cop offset:0, count:1 +└─TableReader_20 1.00 root data:Limit_19 + └─Limit_19 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc, stats:pseudo explain select * from t4 use index(idx) where a > 1 and b > 1 and c > 1 limit 1; id count task operator info @@ -137,8 +137,8 @@ Limit_9 1.00 root offset:0, count:1 explain select * from t4 where a > 1 and c > 1 limit 1; id count task operator info Limit_8 1.00 root offset:0, count:1 -└─TableReader_15 1.00 root data:Limit_14 - └─Limit_14 1.00 cop offset:0, count:1 +└─TableReader_14 1.00 root data:Limit_13 + └─Limit_13 1.00 cop offset:0, count:1 └─Selection_12 1.00 cop gt(test.t4.c, 1) └─TableScan_11 3.00 cop table:t4, range:(1,+inf], keep order:false, stats:pseudo explain select ifnull(null, t1.c1) from t1; @@ -154,42 +154,42 @@ id count task operator info Union_17 26000.00 root ├─HashAgg_21 16000.00 root group by:c1, funcs:firstrow(join_agg_0) │ └─Union_22 16000.00 root -│ ├─StreamAgg_35 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) -│ │ └─IndexReader_36 8000.00 root index:StreamAgg_26 +│ ├─StreamAgg_34 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) +│ │ └─IndexReader_35 8000.00 root index:StreamAgg_26 │ │ └─StreamAgg_26 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) -│ │ └─IndexScan_34 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -│ └─StreamAgg_52 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) -│ └─IndexReader_53 8000.00 root index:StreamAgg_43 -│ └─StreamAgg_43 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) -│ └─IndexScan_51 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -└─TableReader_59 10000.00 root data:TableScan_58 - └─TableScan_58 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo +│ │ └─IndexScan_33 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +│ └─StreamAgg_49 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) +│ └─IndexReader_50 8000.00 root index:StreamAgg_41 +│ └─StreamAgg_41 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) +│ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +└─TableReader_55 10000.00 root data:TableScan_54 + └─TableScan_54 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select c1 from t2 union all select c1 from t2 union select c1 from t2; id count task operator info HashAgg_18 24000.00 root group by:c1, funcs:firstrow(join_agg_0) └─Union_19 24000.00 root - ├─StreamAgg_32 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - │ └─IndexReader_33 8000.00 root index:StreamAgg_23 + ├─StreamAgg_31 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + │ └─IndexReader_32 8000.00 root index:StreamAgg_23 │ └─StreamAgg_23 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - │ └─IndexScan_31 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - ├─StreamAgg_49 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - │ └─IndexReader_50 8000.00 root index:StreamAgg_40 - │ └─StreamAgg_40 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - │ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - └─StreamAgg_66 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - └─IndexReader_67 8000.00 root index:StreamAgg_57 - └─StreamAgg_57 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - └─IndexScan_65 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + │ └─IndexScan_30 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + ├─StreamAgg_46 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + │ └─IndexReader_47 8000.00 root index:StreamAgg_38 + │ └─StreamAgg_38 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + │ └─IndexScan_45 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_61 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + └─IndexReader_62 8000.00 root index:StreamAgg_53 + └─StreamAgg_53 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + └─IndexScan_60 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo set @@session.tidb_opt_insubquery_unfold = 0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(col_0) -└─Projection_35 10000.00 root cast(5_aux_0) - └─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 +└─Projection_33 10000.00 root cast(5_aux_0) + └─MergeJoin_26 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 ├─TableReader_19 10000.00 root data:TableScan_18 │ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo - └─IndexReader_23 10000.00 root index:IndexScan_22 - └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexReader_21 10000.00 root index:IndexScan_20 + └─IndexScan_20 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select 1 in (select c2 from t2) from t1; id count task operator info Projection_6 10000.00 root 5_aux_0 @@ -217,10 +217,10 @@ subgraph cluster12{ node [style=filled, color=lightgrey] color=black label = "root" -"StreamAgg_12" -> "Projection_35" -"Projection_35" -> "MergeJoin_28" -"MergeJoin_28" -> "TableReader_19" -"MergeJoin_28" -> "IndexReader_23" +"StreamAgg_12" -> "Projection_33" +"Projection_33" -> "MergeJoin_26" +"MergeJoin_26" -> "TableReader_19" +"MergeJoin_26" -> "IndexReader_21" } subgraph cluster18{ node [style=filled, color=lightgrey] @@ -228,14 +228,14 @@ color=black label = "cop" "TableScan_18" } -subgraph cluster22{ +subgraph cluster20{ node [style=filled, color=lightgrey] color=black label = "cop" -"IndexScan_22" +"IndexScan_20" } "TableReader_19" -> "TableScan_18" -"IndexReader_23" -> "IndexScan_22" +"IndexReader_21" -> "IndexScan_20" } explain format="dot" select 1 in (select c2 from t2) from t1; @@ -333,8 +333,8 @@ drop table if exists t; create table t(a bigint, b bigint, index idx(a, b)); explain select * from t where a in (1, 2) and a in (1, 3); id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where b in (1, 2) and b in (1, 3); id count task operator info TableReader_7 10.00 root data:Selection_6 @@ -342,8 +342,8 @@ TableReader_7 10.00 root data:Selection_6 └─TableScan_5 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 1; id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 2; id count task operator info TableDual_5 0.00 root rows:0 diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 73cbf7877b685..3aaea611ad530 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -41,8 +41,8 @@ TableReader_6 1999.00 root data:TableScan_5 └─TableScan_5 1999.00 cop table:t1, range:(0,+inf], keep order:false explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id count task operator info -IndexReader_9 0.00 root index:IndexScan_8 -└─IndexScan_8 0.00 cop table:t1, index:c2, range:[1,1], keep order:false +IndexReader_6 0.00 root index:IndexScan_5 +└─IndexScan_5 0.00 cop table:t1, index:c2, range:[1,1], keep order:false explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info Projection_6 2481.25 root test.t1.c1, test.t1.c2, test.t1.c3, test.t2.c1, test.t2.c2 @@ -110,8 +110,8 @@ Projection_12 1999.00 root eq(test.t1.c2, test.t2.c2) explain select * from t1 order by c1 desc limit 1; id count task operator info Limit_10 1.00 root offset:0, count:1 -└─TableReader_21 1.00 root data:Limit_20 - └─Limit_20 1.00 cop offset:0, count:1 +└─TableReader_20 1.00 root data:Limit_19 + └─Limit_19 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc set @@session.tidb_opt_insubquery_unfold = 0; explain select 1 in (select c2 from t2) from t1; diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index de524a7de28b0..9f3926269dff7 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -249,30 +249,30 @@ insert t values(0,0,0); explain select distinct b from t group by a; id count task operator info HashAgg_7 8000.00 root group by:test.t.b, funcs:firstrow(test.t.b) -└─StreamAgg_20 8000.00 root group by:col_1, funcs:firstrow(col_0) - └─IndexReader_21 8000.00 root index:StreamAgg_11 +└─StreamAgg_19 8000.00 root group by:col_1, funcs:firstrow(col_0) + └─IndexReader_20 8000.00 root index:StreamAgg_11 └─StreamAgg_11 8000.00 cop group by:test.t.a, funcs:firstrow(test.t.b) - └─IndexScan_19 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexScan_18 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select distinct b from t group by a; b 0 explain select count(b) from t group by a; id count task operator info -StreamAgg_17 8000.00 root group by:col_1, funcs:count(col_0) -└─IndexReader_18 8000.00 root index:StreamAgg_8 +StreamAgg_16 8000.00 root group by:col_1, funcs:count(col_0) +└─IndexReader_17 8000.00 root index:StreamAgg_8 └─StreamAgg_8 8000.00 cop group by:test.t.a, funcs:count(test.t.b) - └─IndexScan_16 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexScan_15 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(b) from t group by a; count(b) 1 insert t values(1,1,1),(3,3,6),(3,2,5),(2,1,4),(1,1,3),(1,1,2); explain select count(a) from t where b>0 group by a, b; id count task operator info -StreamAgg_21 2666.67 root group by:col_1, col_2, funcs:count(col_0) -└─IndexReader_22 2666.67 root index:StreamAgg_9 +StreamAgg_20 2666.67 root group by:col_1, col_2, funcs:count(col_0) +└─IndexReader_21 2666.67 root index:StreamAgg_9 └─StreamAgg_9 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a) - └─Selection_20 3333.33 cop gt(test.t.b, 0) - └─IndexScan_19 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─Selection_19 3333.33 cop gt(test.t.b, 0) + └─IndexScan_18 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b; count(a) 3 @@ -282,11 +282,11 @@ count(a) explain select count(a) from t where b>0 group by a, b order by a; id count task operator info Projection_7 2666.67 root count(a) -└─StreamAgg_33 2666.67 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) - └─IndexReader_34 2666.67 root index:StreamAgg_31 - └─StreamAgg_31 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) - └─Selection_24 3333.33 cop gt(test.t.b, 0) - └─IndexScan_23 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo +└─StreamAgg_31 2666.67 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) + └─IndexReader_32 2666.67 root index:StreamAgg_29 + └─StreamAgg_29 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) + └─Selection_23 3333.33 cop gt(test.t.b, 0) + └─IndexScan_22 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b order by a; count(a) 3 @@ -297,11 +297,11 @@ explain select count(a) from t where b>0 group by a, b order by a limit 1; id count task operator info Projection_9 1.00 root count(a) └─Limit_15 1.00 root offset:0, count:1 - └─StreamAgg_42 1.00 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) - └─IndexReader_43 1.00 root index:StreamAgg_37 - └─StreamAgg_37 1.00 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) - └─Selection_41 1.25 cop gt(test.t.b, 0) - └─IndexScan_40 3.75 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_39 1.00 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) + └─IndexReader_40 1.00 root index:StreamAgg_35 + └─StreamAgg_35 1.00 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) + └─Selection_38 1.25 cop gt(test.t.b, 0) + └─IndexScan_37 3.75 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b order by a limit 1; count(a) 3 diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result index fca50edb85a4d..f91277a5a774e 100644 --- a/cmd/explaintest/r/topn_push_down.result +++ b/cmd/explaintest/r/topn_push_down.result @@ -169,20 +169,20 @@ LIMIT 0, 5; id count task operator info Projection_12 0.00 root te.expect_time └─Limit_18 0.00 root offset:0, count:5 - └─IndexJoin_136 0.00 root left outer join, inner:IndexReader_135, outer key:tr.id, inner key:p.relate_id - ├─TopN_139 0.00 root te.expect_time:asc, offset:0, count:5 + └─IndexJoin_100 0.00 root left outer join, inner:IndexReader_99, outer key:tr.id, inner key:p.relate_id + ├─TopN_103 0.00 root te.expect_time:asc, offset:0, count:5 │ └─IndexJoin_34 0.00 root inner join, inner:IndexLookUp_33, outer key:tr.id, inner key:te.trade_id - │ ├─IndexLookUp_104 0.00 root - │ │ ├─Selection_102 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) - │ │ │ └─IndexScan_100 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo - │ │ └─Selection_103 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) - │ │ └─TableScan_101 0.00 cop table:tr, keep order:false + │ ├─IndexLookUp_82 0.00 root + │ │ ├─Selection_80 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) + │ │ │ └─IndexScan_78 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo + │ │ └─Selection_81 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) + │ │ └─TableScan_79 0.00 cop table:tr, keep order:false │ └─IndexLookUp_33 250.00 root │ ├─IndexScan_30 10.00 cop table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo │ └─Selection_32 250.00 cop ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000) │ └─TableScan_31 10.00 cop table:te, keep order:false, stats:pseudo - └─IndexReader_135 10.00 root index:IndexScan_134 - └─IndexScan_134 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo + └─IndexReader_99 10.00 root index:IndexScan_98 + └─IndexScan_98 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo desc select 1 as a from dual order by a limit 1; id count task operator info Projection_7 1.00 root 1 diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index d86c8651e287f..a1235c46b6155 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -932,16 +932,16 @@ id count task operator info Sort_13 14.41 root supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc └─Projection_14 14.41 root tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0 └─HashAgg_17 14.41 root group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size) - └─HashLeftJoin_22 3863988.24 root anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] + └─HashLeftJoin_22 3863988.24 root anti semi join, inner:TableReader_45, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] ├─IndexJoin_26 4829985.30 root inner join, inner:IndexReader_25, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey - │ ├─TableReader_41 1200618.43 root data:Selection_40 - │ │ └─Selection_40 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) - │ │ └─TableScan_39 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + │ ├─TableReader_40 1200618.43 root data:Selection_39 + │ │ └─Selection_39 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) + │ │ └─TableScan_38 10000000.00 cop table:part, range:[-inf,+inf], keep order:false │ └─IndexReader_25 1.00 root index:IndexScan_24 │ └─IndexScan_24 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false - └─TableReader_46 400000.00 root data:Selection_45 - └─Selection_45 400000.00 cop like(tpch.supplier.s_comment, "%Customer%Complaints%", 92) - └─TableScan_44 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + └─TableReader_45 400000.00 root data:Selection_44 + └─Selection_44 400000.00 cop like(tpch.supplier.s_comment, "%Customer%Complaints%", 92) + └─TableScan_43 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false /* Q17 Small-Quantity-Order Revenue Query This query determines how much average yearly revenue would be lost if orders were no longer filled for small diff --git a/cmd/explaintest/t/access_path_selection.test b/cmd/explaintest/t/access_path_selection.test new file mode 100644 index 0000000000000..9104bd8297ac1 --- /dev/null +++ b/cmd/explaintest/t/access_path_selection.test @@ -0,0 +1,21 @@ +CREATE TABLE `outdated_statistics` ( +`a` int, +`b` int, +`c` int, +INDEX idx_a(a), +INDEX idx_ab(a,b) +); +insert into outdated_statistics values (2, 2, 2); +insert into outdated_statistics values (3, 3, 3); +insert into outdated_statistics values (4, 4, 4); +analyze table outdated_statistics; +insert into outdated_statistics values (1, 1, 1); +insert into outdated_statistics values (1, 2, 2); +insert into outdated_statistics values (1, 3, 3); +# Only update idx_ab and leave idx_a outdated. +# Then the estimated number of rows on idx_ab is 1 while it's 0 on idx_a. +# But for this query, idx_ab is always better than idx_a, +# because idx_a can't take column b into account while idx_ab can. +# This wrong case is solved by Skyline Pruning. +analyze table outdated_statistics index idx_ab; +explain select * from outdated_statistics where a=1 and b=1 and c=1; diff --git a/expression/util.go b/expression/util.go index b4fbfe9adb86e..36ada315874f3 100644 --- a/expression/util.go +++ b/expression/util.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/hack" + "golang.org/x/tools/container/intsets" ) // Filter the input expressions, append the results to result. @@ -92,6 +93,26 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool return result } +// ExtractColumnSet extracts the different values of `UniqueId` for columns in expressions. +func ExtractColumnSet(exprs []Expression) *intsets.Sparse { + set := &intsets.Sparse{} + for _, expr := range exprs { + extractColumnSet(expr, set) + } + return set +} + +func extractColumnSet(expr Expression, set *intsets.Sparse) { + switch v := expr.(type) { + case *Column: + set.Insert(int(v.UniqueID)) + case *ScalarFunction: + for _, arg := range v.GetArgs() { + extractColumnSet(arg, set) + } + } +} + // ColumnSubstitute substitutes the columns in filter to expressions in select fields. // e.g. select * from (select b as a from t) k where a < 10 => select * from (select b as a from t where b < 10) k. func ColumnSubstitute(expr Expression, schema *Schema, newExprs []Expression) Expression { diff --git a/go.mod b/go.mod index 1aee0455f61b1..bd6bc2dbf0957 100644 --- a/go.mod +++ b/go.mod @@ -73,6 +73,7 @@ require ( golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e // indirect golang.org/x/text v0.3.0 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect + golang.org/x/tools v0.0.0-20181105230042-78dc5bac0cac google.golang.org/genproto v0.0.0-20180427144745-86e600f69ee4 // indirect google.golang.org/grpc v1.12.0 gopkg.in/fsnotify.v1 v1.4.7 // indirect diff --git a/go.sum b/go.sum index dbf99fee9ebd6..ce2691fdcb146 100644 --- a/go.sum +++ b/go.sum @@ -92,10 +92,6 @@ github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030 h1:XJLuW0lsP7vAt github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw= github.com/pingcap/kvproto v0.0.0-20181109035735-8e3f33ac4929 h1:NAq95+VGsS2G7SjzZ5LP9iUlCMNAs13QUzbNY3G90v8= github.com/pingcap/kvproto v0.0.0-20181109035735-8e3f33ac4929/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk= -github.com/pingcap/parser v0.0.0-20190118033454-a52e5bde3bd2 h1:7YGx4hF6M0nlFJVZrLF3EbMRI+XOizL+9aB8Txe745U= -github.com/pingcap/parser v0.0.0-20190118033454-a52e5bde3bd2/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= -github.com/pingcap/parser v0.0.0-20190214121452-6d10a0b75f3e h1:tfl2np1PRmAQQoHjeyzy1qYY62RD6camcHf6wVjQ19M= -github.com/pingcap/parser v0.0.0-20190214121452-6d10a0b75f3e/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/parser v0.0.0-20190218023123-90a796aef0c5 h1:0sHmsTSdOkatWgUbz1bWDz57z4c1drsR2aAXpEgK6Co= github.com/pingcap/parser v0.0.0-20190218023123-90a796aef0c5/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v2.1.0-rc.4+incompatible h1:/buwGk04aHO5odk/+O8ZOXGs4qkUjYTJ2UpCJXna8NE= @@ -146,6 +142,8 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20181105230042-78dc5bac0cac h1:0Nb35Izc6T6Yz1iGmRc4cg14cxRaFjbjD4hWFI6JNJ8= +golang.org/x/tools v0.0.0-20181105230042-78dc5bac0cac/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/genproto v0.0.0-20180427144745-86e600f69ee4 h1:0rk3/gV3HbvCeUzVMhdxV3TEVKMVPDnayjN7sYRmcxY= google.golang.org/genproto v0.0.0-20180427144745-86e600f69ee4/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/grpc v1.12.0 h1:Mm8atZtkT+P6R43n/dqNDWkPPu5BwRVu/1rJnJCeZH8= diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 8d9ef2affc5c3..ed8203a0c0026 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" + "golang.org/x/tools/container/intsets" ) const ( @@ -196,6 +197,134 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { return nil, nil } +// candidatePath is used to maintain required info for skyline pruning. +type candidatePath struct { + path *accessPath + columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. + isSingleScan bool + isMatchProp bool +} + +// compareColumnSet will compares the two set. The last return value is used to indicate +// if they are comparable, it is false when both two sets have columns that do not occur in the other. +// When the second return value is true, the value of first: +// (1) -1 means that `l` is a strict subset of `r`; +// (2) 0 means that `l` equals to `r`; +// (3) 1 means that `l` is a strict superset of `r`. +func compareColumnSet(l, r *intsets.Sparse) (int, bool) { + lLen, rLen := l.Len(), r.Len() + if lLen < rLen { + // -1 is meaningful only when l.SubsetOf(r) is true. + return -1, l.SubsetOf(r) + } + if lLen == rLen { + // 0 is meaningful only when l.SubsetOf(r) is true. + return 0, l.SubsetOf(r) + } + // 1 is meaningful only when r.SubsetOf(l) is true. + return 1, r.SubsetOf(l) +} + +func compareBool(l, r bool) int { + if l == r { + return 0 + } + if l == false { + return -1 + } + return 1 +} + +// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: +// (1): the set of columns that occurred in the access condition, +// (2): whether or not it matches the physical property +// (3): does it require a double scan. +// If `x` is not worse than `y` at all factors, +// and there exists one factor that `x` is better than `y`, then `x` is better than `y`. +func compareCandidates(lhs, rhs *candidatePath) int { + setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) + if !comparable { + return 0 + } + scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan) + matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp) + sum := setsResult + scanResult + matchResult + if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 { + return 1 + } + if setsResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 { + return -1 + } + return 0 +} + +func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { + candidate := &candidatePath{path: path} + pkCol := ds.getPKIsHandleCol() + candidate.isMatchProp = len(prop.Cols) == 1 && pkCol != nil && prop.Cols[0].Equal(nil, pkCol) + candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.isSingleScan = true + return candidate +} + +func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { + candidate := &candidatePath{path: path} + // When the prop is empty, `isMatchProp` is better to be `false` because + // it needs not to keep order for index scan. + if !prop.IsEmpty() { + for i, col := range path.index.Columns { + if col.Name.L == prop.Cols[0].ColName.L { + candidate.isMatchProp = matchIndicesProp(path.index.Columns[i:], prop.Cols) + break + } else if i >= path.eqCondCount { + break + } + } + } + candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.isSingleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle) + return candidate +} + +// skylinePruning prunes access paths according to different factors. An access path can be pruned only if +// there exists a path that is not worse than it at all factors and there is at least one better factor. +func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath { + candidates := make([]*candidatePath, 0, 4) + for _, path := range ds.possibleAccessPaths { + // if we already know the range of the scan is empty, just return a TableDual + if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { + return []*candidatePath{{path: path}} + } + var currentCandidate *candidatePath + if path.isTablePath { + currentCandidate = ds.getTableCandidate(path, prop) + } else if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced { + // We will use index to generate physical plan if: + // this path's access cond is not nil or + // we have prop to match or + // this index is forced to choose. + currentCandidate = ds.getIndexCandidate(path, prop) + } else { + continue + } + pruned := false + for i := len(candidates) - 1; i >= 0; i-- { + result := compareCandidates(candidates[i], currentCandidate) + if result == 1 { + pruned = true + // We can break here because the current candidate cannot prune others anymore. + break + } else if result == -1 { + candidates = append(candidates[:i], candidates[i+1:]...) + } + } + if !pruned { + candidates = append(candidates, currentCandidate) + } + } + return candidates +} + // findBestTask implements the PhysicalPlan interface. // It will enumerate all the available indices and choose a plan with least cost. func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err error) { @@ -252,7 +381,9 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err t = invalidTask - for _, path := range ds.possibleAccessPaths { + candidates := ds.skylinePruning(prop) + for _, candidate := range candidates { + path := candidate.path // if we already know the range of the scan is empty, just return a TableDual if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { dual := PhysicalTableDual{}.init(ds.ctx, ds.stats) @@ -262,7 +393,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err }, nil } if path.isTablePath { - tblTask, err := ds.convertToTableScan(prop, path) + tblTask, err := ds.convertToTableScan(prop, candidate) if err != nil { return nil, errors.Trace(err) } @@ -271,18 +402,12 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err } continue } - // We will use index to generate physical plan if: - // this path's access cond is not nil or - // we have prop to match or - // this index is forced to choose. - if len(path.accessConds) > 0 || len(prop.Cols) > 0 || path.forced { - idxTask, err := ds.convertToIndexScan(prop, path) - if err != nil { - return nil, errors.Trace(err) - } - if idxTask.cost() < t.cost() { - t = idxTask - } + idxTask, err := ds.convertToIndexScan(prop, candidate) + if err != nil { + return nil, errors.Trace(err) + } + if idxTask.cost() < t.cost() { + t = idxTask } } return @@ -324,7 +449,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) { + path := candidate.path idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, @@ -347,7 +473,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * } rowCount := path.countAfterAccess cop := &copTask{indexPlan: is} - if !isCoveringIndex(ds.schema.Columns, is.Index.Columns, is.Table.PKIsHandle) { + if !candidate.isSingleScan { // If it's parent requires single read task, return max cost. if prop.TaskTp == property.CopSingleReadTaskType { return invalidTask, nil @@ -366,23 +492,10 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * return invalidTask, nil } is.initSchema(ds.id, idx, cop.tablePlan != nil) - // Check if this plan matches the property. - matchProperty := false - if !prop.IsEmpty() { - for i, col := range idx.Columns { - // not matched - if col.Name.L == prop.Cols[0].ColName.L { - matchProperty = matchIndicesProp(idx.Columns[i:], prop.Cols) - break - } else if i >= path.eqCondCount { - break - } - } - } // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { + if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { selectivity := ds.stats.RowCount / path.countAfterAccess rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount) } @@ -390,7 +503,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * is.stats.UsePseudoStats = ds.statisticTable.Pseudo cop.cst = rowCount * scanFactor task = cop - if matchProperty { + if candidate.isMatchProp { if prop.Desc { is.Desc = true cop.cst = rowCount * descScanFactor @@ -500,7 +613,7 @@ func splitIndexFilterConditions(conditions []expression.Expression, indexColumns } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) { // It will be handled in convertToIndexScan. if prop.TaskTp == property.CopDoubleReadTaskType { return invalidTask, nil @@ -515,15 +628,14 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * physicalTableID: ds.physicalTableID, }.init(ds.ctx) ts.SetSchema(ds.schema) - var pkCol *expression.Column if ts.Table.PKIsHandle { if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil { - pkCol = expression.ColInfo2Col(ts.schema.Columns, pkColInfo) if ds.statisticTable.Columns[pkColInfo.ID] != nil { ts.Hist = &ds.statisticTable.Columns[pkColInfo.ID].Histogram } } } + path := candidate.path ts.Ranges = path.ranges ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters rowCount := path.countAfterAccess @@ -532,18 +644,17 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * indexPlanFinished: true, } task = copTask - matchProperty := len(prop.Cols) == 1 && pkCol != nil && prop.Cols[0].Equal(nil, pkCol) // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { + if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { selectivity := ds.stats.RowCount / rowCount rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount) } ts.stats = property.NewSimpleStats(rowCount) ts.stats.UsePseudoStats = ds.statisticTable.Pseudo copTask.cst = rowCount * scanFactor - if matchProperty { + if candidate.isMatchProp { if prop.Desc { ts.Desc = true copTask.cst = rowCount * descScanFactor diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 98a8aa1ffb939..a834373401012 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -16,6 +16,7 @@ package core import ( "fmt" "sort" + "strings" "testing" . "github.com/pingcap/check" @@ -27,6 +28,7 @@ import ( "github.com/pingcap/tidb/domain" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/mock" @@ -2013,3 +2015,101 @@ func (s *testPlanSuite) TestNameResolver(c *C) { } } } + +func byItemsToProperty(byItems []*ByItems) *property.PhysicalProperty { + pp := &property.PhysicalProperty{} + for _, item := range byItems { + pp.Cols = append(pp.Cols, item.Expr.(*expression.Column)) + } + return pp +} + +func pathsName(paths []*candidatePath) string { + var names []string + for _, path := range paths { + if path.path.isTablePath { + names = append(names, "PRIMARY_KEY") + } else { + names = append(names, path.path.index.Name.O) + } + } + return strings.Join(names, ",") +} + +func (s *testPlanSuite) TestSkylinePruning(c *C) { + defer testleak.AfterTest(c)() + tests := []struct { + sql string + result string + }{ + { + sql: "select * from t", + result: "PRIMARY_KEY", + }, + { + sql: "select * from t order by f", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where a > 1", + result: "PRIMARY_KEY", + }, + { + sql: "select * from t where a > 1 order by f", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where f > 1", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select f from t where f > 1", + result: "f,f_g", + }, + { + sql: "select f from t where f > 1 order by a", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where f > 1 and g > 1", + result: "PRIMARY_KEY,f,g,f_g", + }, + } + for i, tt := range tests { + comment := Commentf("case:%v sql:%s", i, tt.sql) + stmt, err := s.ParseOneStmt(tt.sql, "", "") + c.Assert(err, IsNil, comment) + Preprocess(s.ctx, stmt, s.is, false) + builder := &planBuilder{ + ctx: mockContext(), + is: s.is, + colMapper: make(map[*ast.ColumnNameExpr]int), + } + p, err := builder.build(stmt) + if err != nil { + c.Assert(err.Error(), Equals, tt.result, comment) + continue + } + c.Assert(err, IsNil) + p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan)) + c.Assert(err, IsNil) + lp := p.(LogicalPlan) + _, err = lp.deriveStats() + c.Assert(err, IsNil) + var ds *DataSource + var byItems []*ByItems + for ds == nil { + switch v := lp.(type) { + case *DataSource: + ds = v + case *LogicalSort: + byItems = v.ByItems + lp = lp.Children()[0] + default: + lp = lp.Children()[0] + } + } + paths := ds.skylinePruning(byItemsToProperty(byItems)) + c.Assert(pathsName(paths), Equals, tt.result) + } +} diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index ed98c6520f8c2..c69e52318cec6 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -225,8 +225,8 @@ func (s *testSelectivitySuite) TestDiscreteDistribution(c *C) { } testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from t where a = 'tw' and b < 0").Check(testkit.Rows( - "IndexReader_9 0.00 root index:IndexScan_8", - "└─IndexScan_8 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false")) + "IndexReader_6 0.00 root index:IndexScan_5", + "└─IndexScan_5 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false")) } func (s *testSelectivitySuite) TestSelectCombinedLowBound(c *C) { @@ -237,8 +237,8 @@ func (s *testSelectivitySuite) TestSelectCombinedLowBound(c *C) { testKit.MustExec("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)") testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from t where kid = 1").Check(testkit.Rows( - "IndexReader_9 7.00 root index:IndexScan_8", - "└─IndexScan_8 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false")) + "IndexReader_6 7.00 root index:IndexScan_5", + "└─IndexScan_5 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false")) } func getRange(start, end int64) []*ranger.Range {