From 7f3ec69acd71536694484a6f23c9e096bb47c687 Mon Sep 17 00:00:00 2001 From: Jk Xu <54522439+Dousir9@users.noreply.github.com> Date: Sat, 30 Nov 2024 12:31:47 +0800 Subject: [PATCH] chore(planner): improve physical join (#16962) * chore(planner): improve physical join * chore(test): add sqllogictest * chore(code): refine comments. * chore(test): update sqllogictest * chore(test): test iceberg * chore(test): test iceberg * chore(planner): fix join type * chore(test): update sqllogictest * chore(test): update sqllogictest --------- Co-authored-by: Bohu --- .../executor/physical_plans/physical_join.rs | 17 ++++++- .../suites/mode/standalone/explain/join.test | 44 +++++++++++++++++++ .../mode/standalone/explain_native/join.test | 44 +++++++++++++++++++ .../sqllogictests/suites/tpch/join_order.test | 12 ++--- 4 files changed, 109 insertions(+), 8 deletions(-) diff --git a/src/query/sql/src/executor/physical_plans/physical_join.rs b/src/query/sql/src/executor/physical_plans/physical_join.rs index 9ef68c7a5301..f8676ca91e36 100644 --- a/src/query/sql/src/executor/physical_plans/physical_join.rs +++ b/src/query/sql/src/executor/physical_plans/physical_join.rs @@ -46,8 +46,21 @@ pub fn physical_join(join: &Join, s_expr: &SExpr) -> Result { return Ok(PhysicalJoinType::Hash); } - let left_prop = RelExpr::with_s_expr(s_expr.child(1)?).derive_relational_prop()?; - let right_prop = RelExpr::with_s_expr(s_expr.child(0)?).derive_relational_prop()?; + let left_rel_expr = RelExpr::with_s_expr(s_expr.child(0)?); + let right_rel_expr = RelExpr::with_s_expr(s_expr.child(1)?); + if matches!( + right_rel_expr + .derive_cardinality()? + .statistics + .precise_cardinality, + Some(1) + ) { + // If the output rows of build side is equal to 1, we use CROSS JOIN + FILTER instead of MERGE JOIN. + return Ok(PhysicalJoinType::Hash); + } + + let left_prop = left_rel_expr.derive_relational_prop()?; + let right_prop = right_rel_expr.derive_relational_prop()?; let mut range_conditions = vec![]; let mut other_conditions = vec![]; for condition in join.non_equi_conditions.iter() { diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 924a5e633db7..7e3386512f56 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -940,3 +940,47 @@ EvalScalar statement ok drop table t1; + +statement ok +CREATE OR REPLACE TABLE t1(a INT); + +statement ok +CREATE OR REPLACE TABLE t2(a INT); + +statement ok +INSERT INTO t1 VALUES(1), (2), (3), (4); + +statement ok +INSERT INTO t2 VALUES(1); + +query T +EXPLAIN SELECT * FROM t1 WHERE a >= (SELECT MAX(a) FROM t2); +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: INNER +├── build keys: [] +├── probe keys: [] +├── filters: [t1.a (#0) >= CAST(scalar_subquery_2 (#2) AS Int32 NULL)] +├── estimated rows: 4.00 +├── EvalScalar(Build) +│ ├── output columns: [MAX(a) (#2)] +│ ├── expressions: [1] +│ ├── estimated rows: 1.00 +│ └── DummyTableScan +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 4 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +DROP TABLE t1; + +statement ok +DROP TABLE t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test index 223721f153ff..8cd00b0d7dc8 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test @@ -634,3 +634,47 @@ set enable_cbo = 1 statement ok drop table t1 + +statement ok +CREATE OR REPLACE TABLE t1(a INT); + +statement ok +CREATE OR REPLACE TABLE t2(a INT); + +statement ok +INSERT INTO t1 VALUES(1), (2), (3), (4); + +statement ok +INSERT INTO t2 VALUES(1); + +query T +EXPLAIN SELECT * FROM t1 WHERE a >= (SELECT MAX(a) FROM t2); +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: INNER +├── build keys: [] +├── probe keys: [] +├── filters: [t1.a (#0) >= CAST(scalar_subquery_2 (#2) AS Int32 NULL)] +├── estimated rows: 4.00 +├── EvalScalar(Build) +│ ├── output columns: [MAX(a) (#2)] +│ ├── expressions: [1] +│ ├── estimated rows: 1.00 +│ └── DummyTableScan +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 4 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +DROP TABLE t1; + +statement ok +DROP TABLE t2; diff --git a/tests/sqllogictests/suites/tpch/join_order.test b/tests/sqllogictests/suites/tpch/join_order.test index 44669fc4e3bb..260dc8daf55b 100644 --- a/tests/sqllogictests/suites/tpch/join_order.test +++ b/tests/sqllogictests/suites/tpch/join_order.test @@ -532,8 +532,8 @@ group by order by value desc limit 100; ---- -RangeJoin: INNER -├── Left +HashJoin: INNER +├── Build │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER @@ -543,7 +543,7 @@ RangeJoin: INNER │ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 1000) │ └── Probe │ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) -└── Right +└── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER @@ -1104,10 +1104,10 @@ order by ---- HashJoin: RIGHT ANTI ├── Build -│ └── RangeJoin: INNER -│ ├── Left +│ └── HashJoin: INNER +│ ├── Build │ │ └── Scan: default.tpch_test.customer (#1) (read rows: 15000) -│ └── Right +│ └── Probe │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) └── Probe └── Scan: default.tpch_test.orders (#2) (read rows: 150000)