Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(planner): improve physical join #16962

Merged
merged 10 commits into from
Nov 30, 2024
17 changes: 15 additions & 2 deletions src/query/sql/src/executor/physical_plans/physical_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,21 @@ pub fn physical_join(join: &Join, s_expr: &SExpr) -> Result<PhysicalJoinType> {
return Ok(PhysicalJoinType::Hash);
}

let left_prop = RelExpr::with_s_expr(s_expr.child(1)?).derive_relational_prop()?;
let right_prop = RelExpr::with_s_expr(s_expr.child(0)?).derive_relational_prop()?;
let left_rel_expr = RelExpr::with_s_expr(s_expr.child(0)?);
let right_rel_expr = RelExpr::with_s_expr(s_expr.child(1)?);
if matches!(
right_rel_expr
.derive_cardinality()?
.statistics
.precise_cardinality,
Some(1)
) {
// If the output rows of build side is equal to 1, we use CROSS JOIN + FILTER instead of MERGE JOIN.
return Ok(PhysicalJoinType::Hash);
}

let left_prop = left_rel_expr.derive_relational_prop()?;
let right_prop = right_rel_expr.derive_relational_prop()?;
let mut range_conditions = vec![];
let mut other_conditions = vec![];
for condition in join.non_equi_conditions.iter() {
Expand Down
44 changes: 44 additions & 0 deletions tests/sqllogictests/suites/mode/standalone/explain/join.test
Original file line number Diff line number Diff line change
Expand Up @@ -940,3 +940,47 @@ EvalScalar

statement ok
drop table t1;

statement ok
CREATE OR REPLACE TABLE t1(a INT);

statement ok
CREATE OR REPLACE TABLE t2(a INT);

statement ok
INSERT INTO t1 VALUES(1), (2), (3), (4);

statement ok
INSERT INTO t2 VALUES(1);

query T
EXPLAIN SELECT * FROM t1 WHERE a >= (SELECT MAX(a) FROM t2);
----
HashJoin
├── output columns: [t1.a (#0)]
├── join type: INNER
├── build keys: []
├── probe keys: []
├── filters: [t1.a (#0) >= CAST(scalar_subquery_2 (#2) AS Int32 NULL)]
├── estimated rows: 4.00
├── EvalScalar(Build)
│ ├── output columns: [MAX(a) (#2)]
│ ├── expressions: [1]
│ ├── estimated rows: 1.00
│ └── DummyTableScan
└── TableScan(Probe)
├── table: default.default.t1
├── output columns: [a (#0)]
├── read rows: 4
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
└── estimated rows: 4.00

statement ok
DROP TABLE t1;

statement ok
DROP TABLE t2;
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,47 @@ set enable_cbo = 1

statement ok
drop table t1

statement ok
CREATE OR REPLACE TABLE t1(a INT);

statement ok
CREATE OR REPLACE TABLE t2(a INT);

statement ok
INSERT INTO t1 VALUES(1), (2), (3), (4);

statement ok
INSERT INTO t2 VALUES(1);

query T
EXPLAIN SELECT * FROM t1 WHERE a >= (SELECT MAX(a) FROM t2);
----
HashJoin
├── output columns: [t1.a (#0)]
├── join type: INNER
├── build keys: []
├── probe keys: []
├── filters: [t1.a (#0) >= CAST(scalar_subquery_2 (#2) AS Int32 NULL)]
├── estimated rows: 4.00
├── EvalScalar(Build)
│ ├── output columns: [MAX(a) (#2)]
│ ├── expressions: [1]
│ ├── estimated rows: 1.00
│ └── DummyTableScan
└── TableScan(Probe)
├── table: default.default.t1
├── output columns: [a (#0)]
├── read rows: 4
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
└── estimated rows: 4.00

statement ok
DROP TABLE t1;

statement ok
DROP TABLE t2;
12 changes: 6 additions & 6 deletions tests/sqllogictests/suites/tpch/join_order.test
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,8 @@ group by
order by
value desc limit 100;
----
RangeJoin: INNER
├── Left
HashJoin: INNER
├── Build
│ └── HashJoin: INNER
│ ├── Build
│ │ └── HashJoin: INNER
Expand All @@ -543,7 +543,7 @@ RangeJoin: INNER
│ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 1000)
│ └── Probe
│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000)
└── Right
└── Probe
└── HashJoin: INNER
├── Build
│ └── HashJoin: INNER
Expand Down Expand Up @@ -1104,10 +1104,10 @@ order by
----
HashJoin: RIGHT ANTI
├── Build
│ └── RangeJoin: INNER
│ ├── Left
│ └── HashJoin: INNER
│ ├── Build
│ │ └── Scan: default.tpch_test.customer (#1) (read rows: 15000)
│ └── Right
│ └── Probe
│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000)
└── Probe
└── Scan: default.tpch_test.orders (#2) (read rows: 150000)