Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix(optimizer): Fix error of EXISTS subquery #6073

Merged
merged 1 commit into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions query/src/sql/exec/data_schema_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,23 @@ impl DataSchemaBuilder {
for field in left.fields().iter() {
fields.push(field.clone());
}
if join_type == &JoinType::Left {
for field in right.fields().iter() {
let nullable_field = DataField::new(field.name(), wrap_nullable(field.data_type()));
fields.push(nullable_field);
match join_type {
JoinType::Left => {
for field in right.fields().iter() {
let nullable_field =
DataField::new(field.name(), wrap_nullable(field.data_type()));
fields.push(nullable_field);
}
}
} else {
for field in right.fields().iter() {
fields.push(field.clone());

JoinType::Semi | JoinType::Anti => {
// Empty right output schema for Semi and Anti join
}

_ => {
for field in right.fields().iter() {
fields.push(field.clone());
}
}
}
DataSchemaRefExt::create(fields)
Expand Down
53 changes: 42 additions & 11 deletions query/src/sql/optimizer/heuristic/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ use common_exception::Result;
use crate::sql::binder::wrap_cast_if_needed;
use crate::sql::binder::JoinCondition;
use crate::sql::optimizer::heuristic::subquery_rewriter::SubqueryRewriter;
use crate::sql::optimizer::ColumnSet;
use crate::sql::optimizer::RelExpr;
use crate::sql::optimizer::SExpr;
use crate::sql::plans::Filter;
use crate::sql::plans::JoinType;
use crate::sql::plans::LogicalInnerJoin;
use crate::sql::plans::PatternPlan;
use crate::sql::plans::Project;
use crate::sql::plans::RelOp;
use crate::sql::plans::SubqueryExpr;
use crate::sql::plans::SubqueryType;
Expand Down Expand Up @@ -119,7 +121,8 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
// This is not necessary, but it is a good heuristic for most cases.
let mut left_conditions = vec![];
let mut right_conditions = vec![];
let mut extra_predicates = vec![];
let mut left_filters = vec![];
let mut right_filters = vec![];
for pred in filter.predicates.iter() {
let join_condition = JoinCondition::new(pred, &input_prop, &filter_prop);
match join_condition {
Expand All @@ -128,8 +131,11 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
return Ok(None);
}

JoinCondition::Left(_) | JoinCondition::Right(_) => {
extra_predicates.push(pred.clone());
JoinCondition::Left(filter) => {
left_filters.push(filter.clone());
}
JoinCondition::Right(filter) => {
right_filters.push(filter.clone());
}

JoinCondition::Both { left, right } => {
Expand All @@ -155,27 +161,52 @@ pub fn try_decorrelate_subquery(input: &SExpr, subquery: &SubqueryExpr) -> Resul
};

// Rewrite plan to semi-join.
let left_child = input.clone();
let mut left_child = input.clone();
if !left_filters.is_empty() {
left_child = SExpr::create_unary(
Filter {
predicates: left_filters,
is_having: false,
}
.into(),
left_child,
);
}

// Remove `Filter` from subquery.
let right_child = SExpr::create_unary(
let mut right_child = SExpr::create_unary(
subquery.subquery.plan().clone(),
SExpr::create_unary(
subquery.subquery.child(0)?.plan().clone(),
SExpr::create_leaf(filter_tree.child(0)?.plan().clone()),
),
);
let mut result = SExpr::create_binary(semi_join.into(), left_child, right_child);

if !extra_predicates.is_empty() {
result = SExpr::create_unary(
if !right_filters.is_empty() {
right_child = SExpr::create_unary(
Filter {
predicates: extra_predicates,
predicates: right_filters,
is_having: false,
}
.into(),
result,
right_child,
);
}
// Add project for join keys
let used_columns = semi_join
.right_conditions
.iter()
.fold(ColumnSet::new(), |v, acc| {
v.union(&acc.used_columns()).cloned().collect()
});
right_child = SExpr::create_unary(
Project {
columns: used_columns,
}
.into(),
right_child,
);

let result = SExpr::create_binary(semi_join.into(), left_child, right_child);

Ok(Some(result))
}
13 changes: 12 additions & 1 deletion query/src/sql/planner/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::Debug;
use std::sync::Arc;

use common_ast::ast::Expr;
Expand All @@ -37,6 +38,16 @@ pub struct TableEntry {
pub source: ReadDataSourcePlan,
}

impl Debug for TableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"TableEntry {{ index: {:?}, name: {:?}, catalog: {:?}, database: {:?} }}",
self.index, self.name, self.catalog, self.database
)
}
}

impl TableEntry {
pub fn new(
index: IndexType,
Expand Down Expand Up @@ -86,7 +97,7 @@ impl ColumnEntry {
/// Metadata stores information about columns and tables used in a query.
/// Tables and columns are identified with its unique index, notice that index value of a column can
/// be same with that of a table.
#[derive(Clone, Default)]
#[derive(Clone, Debug, Default)]
pub struct Metadata {
tables: Vec<TableEntry>,
columns: Vec<ColumnEntry>,
Expand Down
24 changes: 24 additions & 0 deletions query/tests/it/sql/optimizer/heuristic/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,30 @@ pub async fn test_heuristic_optimizer_subquery() -> Result<()> {
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "".to_string(),
query: "select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists with different kinds of predicate".to_string(),
query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists with non-equi predicate".to_string(),
query: "select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
Suite {
comment: "# Exists project required columns".to_string(),
query: "select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number)"
.to_string(),
rules: DEFAULT_REWRITE_RULES.clone(),
},
];

run_suites(ctx, &mut file, &suites).await
Expand Down
46 changes: 46 additions & 0 deletions query/tests/it/sql/optimizer/heuristic/testdata/subquery.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,49 @@ Project: [number]
Scan: default.system.numbers


select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Scan: default.system.numbers
Project: [number]
Scan: default.system.numbers


# Exists with different kinds of predicate
select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and number = 0 and t.number < 10)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Filter: [number < 10]
Scan: default.system.numbers
Project: [number]
Filter: [number = 0]
Scan: default.system.numbers


# Exists with non-equi predicate
select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number and t.number < number)
----
Project: [number]
Filter: [subquery_3]
CrossApply
Scan: default.system.numbers
Project: [subquery]
EvalScalar: [count(*) > 0]
Aggregate: group items: [], aggregate functions: [count(*)]
Project: [number]
Filter: [number = number, number < number]
Scan: default.system.numbers


# Exists project required columns
select t.number from numbers(1) as t where exists (select number as a, number as b, number as c from numbers(1) where number = t.number)
----
Project: [number]
HashJoin: SEMI, build keys: [number], probe keys: [number]
Scan: default.system.numbers
Project: [number]
Scan: default.system.numbers


3 changes: 3 additions & 0 deletions tests/suites/0_stateless/20+_others/20_0001_planner_v2.result
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,6 @@ NULL NULL NULL 1
2
3
4
1
0
1
7 changes: 7 additions & 0 deletions tests/suites/0_stateless/20+_others/20_0001_planner_v2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ CREATE FUNCTION notnull1 AS (p) -> not(is_null(p));
SELECT notnull1(null);
SELECT notnull1('null');

drop function a_plus_3;
drop function cal1;
drop function notnull1;

--set operator
select '====Intersect Distinct===';
create table t1(a int, b int);
Expand Down Expand Up @@ -339,5 +343,8 @@ drop table n;
select * from numbers(5) as t where exists (select * from numbers(3) where number = t.number);
select * from numbers(5) as t where not exists (select * from numbers(3) where number = t.number);

select * from numbers(5) as t where exists (select number as a from numbers(3) where number = t.number and number > 0 and t.number < 2);
select * from numbers(5) as t where exists (select * from numbers(3) where number > t.number);

set enable_planner_v2 = 0;