Skip to content

Commit

Permalink
Merge pull request #6073 from leiysky/fix-exists
Browse files Browse the repository at this point in the history
bugfix(optimizer): Fix error of `EXISTS` subquery
  • Loading branch information
leiysky authored Jun 20, 2022
2 parents 372c755 + b6ff233 commit 2236d9a
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 19 deletions.
23 changes: 16 additions & 7 deletions query/src/sql/exec/data_schema_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,23 @@ impl DataSchemaBuilder {
for field in left.fields().iter() {
fields.push(field.clone());
}
if join_type == &JoinType::Left {
for field in right.fields().iter() {
let nullable_field = DataField::new(field.name(), wrap_nullable(field.data_type()));
fields.push(nullable_field);
match join_type {
JoinType::Left => {
for field in right.fields().iter() {
let nullable_field =
DataField::new(field.name(), wrap_nullable(field.data_type()));
fields.push(nullable_field);
}
}
} else {
for field in right.fields().iter() {
fields.push(field.clone());

JoinType::Semi | JoinType::Anti => {
// Empty right output schema for Semi and Anti join
}

_ => {
for field in right.fields().iter() {
fields.push(field.clone());
}
}
}
DataSchemaRefExt::create(fields)
Expand Down
53 changes: 42 additions & 11 deletions query/src/sql/optimizer/heuristic/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ use common_exception::Result;
use crate::sql::binder::wrap_cast_if_needed;
use crate::sql::binder::JoinCondition;
use crate::sql::optimizer::heuristic::subquery_rewriter::SubqueryRewriter;
use crate::sql::optimizer::ColumnSet;
use crate::sql::optimizer::RelExpr;
use crate::sql::optimizer::SExpr;
use crate::sql::plans::Filter;
use crate::sql::plans::JoinType;
use crate::sql::plans::LogicalInnerJoin;
use crate::sql::plans::PatternPlan;
use crate::sql::plans::Project;
use crate::sql::plans::RelOp;
use crate::sql::plans::SubqueryExpr;
use crate::sql::plans::SubqueryType;
Expand Down Expand Up @@ -119,7 +121,8 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
// This is not necessary, but it is a good heuristic for most cases.
let mut left_conditions = vec![];
let mut right_conditions = vec![];
let mut extra_predicates = vec![];
let mut left_filters = vec![];
let mut right_filters = vec![];
for pred in filter.predicates.iter() {
let join_condition = JoinCondition::new(pred, &input_prop, &filter_prop);
match join_condition {
Expand All @@ -128,8 +131,11 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
return Ok(None);
}

JoinCondition::Left(_) | JoinCondition::Right(_) => {
extra_predicates.push(pred.clone());
JoinCondition::Left(filter) => {
left_filters.push(filter.clone());
}
JoinCondition::Right(filter) => {
right_filters.push(filter.clone());
}

JoinCondition::Both { left, right } => {
Expand All @@ -155,27 +161,52 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
};

// Rewrite plan to semi-join.
let left_child = input.clone();
let mut left_child = input.clone();
if !left_filters.is_empty() {
left_child = SExpr::create_unary(
Filter {
predicates: left_filters,
is_having: false,
}
.into(),
left_child,
);
}

// Remove `Filter` from subquery.
let right_child = SExpr::create_unary(
let mut right_child = SExpr::create_unary(
subquery.subquery.plan().clone(),
SExpr::create_unary(
subquery.subquery.child(0)?.plan().clone(),
SExpr::create_leaf(filter_tree.child(0)?.plan().clone()),
),
);
let mut result = SExpr::create_binary(semi_join.into(), left_child, right_child);

if !extra_predicates.is_empty() {
result = SExpr::create_unary(
if !right_filters.is_empty() {
right_child = SExpr::create_unary(
Filter {
predicates: extra_predicates,
predicates: right_filters,
is_having: false,
}
.into(),
result,
right_child,
);
}
// Add project for join keys
let used_columns = semi_join
.right_conditions
.iter()
.fold(ColumnSet::new(), |v, acc| {
v.union(&acc.used_columns()).cloned().collect()
});
right_child = SExpr::create_unary(
Project {
columns: used_columns,
}
.into(),
right_child,
);

let result = SExpr::create_binary(semi_join.into(), left_child, right_child);

Ok(Some(result))
}
13 changes: 12 additions & 1 deletion query/src/sql/planner/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::Debug;
use std::sync::Arc;

use common_ast::ast::Expr;
Expand All @@ -37,6 +38,16 @@ pub struct TableEntry {
pub source: ReadDataSourcePlan,
}

impl Debug for TableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"TableEntry {{ index: {:?}, name: {:?}, catalog: {:?}, database: {:?} }}",
self.index, self.name, self.catalog, self.database
)
}
}

impl TableEntry {
pub fn new(
index: IndexType,
Expand Down Expand Up @@ -86,7 +97,7 @@ impl ColumnEntry {
/// Metadata stores information about columns and tables used in a query.
/// Tables and columns are identified with its unique index, notice that index value of a column can
/// be same with that of a table.
#[derive(Clone, Default)]
#[derive(Clone, Debug, Default)]
pub struct Metadata {
tables: Vec<TableEntry>,
columns: Vec<ColumnEntry>,
Expand Down
24 changes: 24 additions & 0 deletions query/tests/it/sql/optimizer/heuristic/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,30 @@ pub async fn test_heuristic_optimizer_subquery() -> Result<()> {
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "".to_string(),
query: "select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists with different kinds of predicate".to_string(),
query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists with non-equi predicate".to_string(),
query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists project required columns".to_string(),
query: "select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
];

run_suites(ctx, &mut file, &suites).await
Expand Down
46 changes: 46 additions & 0 deletions query/tests/it/sql/optimizer/heuristic/testdata/subquery.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,49 @@ Project: [number]
Scan: default.system.numbers


select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Scan: default.system.numbers
Project: [number]
Scan: default.system.numbers


# Exists with different kinds of predicate
select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Filter: [number < 10]
Scan: default.system.numbers
Project: [number]
Filter: [number = 0]
Scan: default.system.numbers


# Exists with non-equi predicate
select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number)
----
Project: [number]
Filter: [subquery_3]
CrossApply
Scan: default.system.numbers
Project: [subquery]
EvalScalar: [count(*) > 0]
Aggregate: group items: [], aggregate functions: [count(*)]
Project: [number]
Filter: [number = number, number < number]
Scan: default.system.numbers


# Exists project required columns
select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Scan: default.system.numbers
Project: [number]
Scan: default.system.numbers


3 changes: 3 additions & 0 deletions tests/suites/0_stateless/20+_others/20_0001_planner_v2.result
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,6 @@ NULL NULL NULL 1
2
3
4
1
0
1
7 changes: 7 additions & 0 deletions tests/suites/0_stateless/20+_others/20_0001_planner_v2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ CREATE FUNCTION notnull1 AS (p) -> not(is_null(p));
SELECT notnull1(null);
SELECT notnull1('null');

drop function a_plus_3;
drop function cal1;
drop function notnull1;

--set operator
select '====Intersect Distinct===';
create table t1(a int, b int);
Expand Down Expand Up @@ -339,5 +343,8 @@ drop table n;
select * from numbers(5) as t where exists (select * from numbers(3) where number = t.number);
select * from numbers(5) as t where not exists (select * from numbers(3) where number = t.number);

select * from numbers(5) as t where exists (select number as a from numbers(3) where number = t.number and number > 0 and t.number < 2);
select * from numbers(5) as t where exists (select * from numbers(3) where number > t.number);

set enable_planner_v2 = 0;

0 comments on commit 2236d9a

Please sign in to comment.