-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(optimizer): Implement LIKE expression rule for query optimization #96
base: main
Are you sure you want to change the base?
Changes from all commits
0e95ab4
e999789
5ce7a82
696232c
5b1d4ed
74e622c
082b635
c699ce3
b3c9992
143f128
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,21 @@ | ||
use crate::expression::{BinaryOperator, ScalarExpression}; | ||
use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; | ||
use crate::optimizer::core::rule::Rule; | ||
use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; | ||
use crate::optimizer::OptimizerError; | ||
use crate::planner::operator::filter::FilterOperator; | ||
use crate::planner::operator::join::JoinCondition; | ||
use crate::planner::operator::Operator; | ||
use crate::types::value::{DataValue, ValueRef}; | ||
use crate::types::LogicalType; | ||
use lazy_static::lazy_static; | ||
lazy_static! { | ||
static ref LIKE_REWRITE_RULE: Pattern = { | ||
Pattern { | ||
predicate: |op| matches!(op, Operator::Filter(_)), | ||
children: PatternChildrenPredicate::None, | ||
} | ||
}; | ||
static ref CONSTANT_CALCULATION_RULE: Pattern = { | ||
Pattern { | ||
predicate: |_| true, | ||
|
@@ -109,6 +119,99 @@ impl Rule for SimplifyFilter { | |
} | ||
} | ||
|
||
pub struct LikeRewrite; | ||
|
||
impl Rule for LikeRewrite { | ||
fn pattern(&self) -> &Pattern { | ||
&LIKE_REWRITE_RULE | ||
} | ||
|
||
fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), OptimizerError> { | ||
if let Operator::Filter(mut filter_op) = graph.operator(node_id).clone() { | ||
if let ScalarExpression::Binary { | ||
op: BinaryOperator::Like, | ||
ref mut left_expr, | ||
ref mut right_expr, | ||
ty, | ||
} = filter_op.predicate.clone() | ||
{ | ||
if let ScalarExpression::Constant(value) = right_expr.as_ref() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Instead of pattern matching layer by layer again |
||
if let DataValue::Utf8(value_str) = (**value).clone() { | ||
Self::process_value_str(value_str, left_expr, ty, &mut filter_op); | ||
} | ||
} | ||
} | ||
graph.replace_node(node_id, Operator::Filter(filter_op)) | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl LikeRewrite { | ||
fn process_value_str( | ||
value_str: Option<String>, | ||
left_expr: &mut Box<ScalarExpression>, | ||
ty: LogicalType, | ||
filter_op: &mut FilterOperator, | ||
) { | ||
value_str.map(|value_str| { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Method encapsulation of little significance, just to collapse the nesting |
||
if value_str.ends_with('%') { | ||
let left_bound = value_str.trim_end_matches('%'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if let Some(right_bound) = increment_last_char(left_bound) {
filter_op.predicate = Self::create_new_expr(
&mut left_expr.clone(),
ty,
left_bound.to_string(),
right_bound,
);
} |
||
let right_bound = increment_last_char(left_bound); | ||
right_bound.map(|rb| { | ||
filter_op.predicate = Self::create_new_expr( | ||
&mut left_expr.clone(), | ||
ty, | ||
left_bound.to_string(), | ||
rb, | ||
); | ||
}); | ||
} | ||
}); | ||
} | ||
|
||
fn create_new_expr( | ||
left_expr: &mut Box<ScalarExpression>, | ||
ty: LogicalType, | ||
left_bound: String, | ||
right_bound: String, | ||
) -> ScalarExpression { | ||
let new_expr = ScalarExpression::Binary { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meaningless variable declaration -> new_expr |
||
op: BinaryOperator::And, | ||
left_expr: Box::new(ScalarExpression::Binary { | ||
op: BinaryOperator::GtEq, | ||
left_expr: left_expr.clone(), | ||
right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8( | ||
Some(left_bound), | ||
)))), | ||
ty, | ||
}), | ||
|
||
right_expr: Box::new(ScalarExpression::Binary { | ||
op: BinaryOperator::Lt, | ||
left_expr: left_expr.clone(), | ||
right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8( | ||
Some(right_bound), | ||
)))), | ||
ty, | ||
}), | ||
ty, | ||
}; | ||
new_expr | ||
} | ||
} | ||
|
||
fn increment_last_char(s: &str) -> Option<String> { | ||
let mut chars: Vec<char> = s.chars().collect(); | ||
for i in (0..chars.len()).rev() { | ||
if let Some(next_char) = std::char::from_u32(chars[i] as u32 + 1) { | ||
loloxwg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
chars[i] = next_char; | ||
return Some(chars.into_iter().collect()); | ||
} | ||
} | ||
None | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use crate::binder::test::select_sql_run; | ||
|
@@ -118,6 +221,7 @@ mod test { | |
use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; | ||
use crate::optimizer::heuristic::batch::HepBatchStrategy; | ||
use crate::optimizer::heuristic::optimizer::HepOptimizer; | ||
use crate::optimizer::rule::simplification::increment_last_char; | ||
use crate::optimizer::rule::RuleImpl; | ||
use crate::planner::operator::filter::FilterOperator; | ||
use crate::planner::operator::Operator; | ||
|
@@ -127,6 +231,13 @@ mod test { | |
use std::collections::Bound; | ||
use std::sync::Arc; | ||
|
||
#[test] | ||
fn test_increment_char() { | ||
assert_eq!(increment_last_char("abc"), Some("abd".to_string())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are too few test cases and there is no test carry situation |
||
assert_eq!(increment_last_char("abz"), Some("ab{".to_string())); | ||
assert_eq!(increment_last_char("ab}"), Some("ab~".to_string())); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_constant_calculation_omitted() -> Result<(), DatabaseError> { | ||
// (2 + (-1)) < -(c1 + 1) | ||
|
@@ -343,7 +454,7 @@ mod test { | |
cb_1_c1, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Unbounded, | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), | ||
}) | ||
); | ||
|
||
|
@@ -353,7 +464,7 @@ mod test { | |
cb_1_c2, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), | ||
max: Bound::Unbounded | ||
max: Bound::Unbounded, | ||
}) | ||
); | ||
|
||
|
@@ -363,7 +474,7 @@ mod test { | |
cb_2_c1, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), | ||
max: Bound::Unbounded | ||
max: Bound::Unbounded, | ||
}) | ||
); | ||
|
||
|
@@ -373,7 +484,7 @@ mod test { | |
cb_1_c1, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Unbounded, | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), | ||
}) | ||
); | ||
|
||
|
@@ -383,7 +494,7 @@ mod test { | |
cb_3_c1, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Unbounded, | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))), | ||
}) | ||
); | ||
|
||
|
@@ -393,7 +504,7 @@ mod test { | |
cb_3_c2, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), | ||
max: Bound::Unbounded | ||
max: Bound::Unbounded, | ||
}) | ||
); | ||
|
||
|
@@ -403,7 +514,7 @@ mod test { | |
cb_4_c1, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), | ||
max: Bound::Unbounded | ||
max: Bound::Unbounded, | ||
}) | ||
); | ||
|
||
|
@@ -413,7 +524,7 @@ mod test { | |
cb_4_c2, | ||
Some(ConstantBinary::Scope { | ||
min: Bound::Unbounded, | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) | ||
max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))), | ||
}) | ||
); | ||
|
||
|
@@ -450,4 +561,85 @@ mod test { | |
|
||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_like_rewrite() -> Result<(), DatabaseError> { | ||
let plan = select_sql_run("select * from t1 where c1 like 'abc%%'").await?; | ||
let best_plan = HepOptimizer::new(plan.clone()) | ||
.batch( | ||
"test_like_rewrite".to_string(), | ||
HepBatchStrategy::once_topdown(), | ||
vec![RuleImpl::LikeRewrite], | ||
) | ||
.find_best()?; | ||
assert_eq!(best_plan.childrens.len(), 1); | ||
|
||
match best_plan.operator { | ||
Operator::Project(op) => { | ||
assert_eq!(op.exprs.len(), 2); | ||
} | ||
_ => unreachable!(), | ||
} | ||
|
||
match &best_plan.childrens[0].operator { | ||
Operator::Filter(op) => { | ||
assert_eq!( | ||
op.predicate, | ||
ScalarExpression::Binary { | ||
op: BinaryOperator::And, | ||
left_expr: Box::new(ScalarExpression::Binary { | ||
op: BinaryOperator::GtEq, | ||
left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new( | ||
ColumnCatalog { | ||
summary: ColumnSummary { | ||
id: Some(0), | ||
name: "c1".to_string(), | ||
}, | ||
nullable: false, | ||
desc: ColumnDesc { | ||
column_datatype: LogicalType::Integer, | ||
is_primary: true, | ||
is_unique: false, | ||
default: None, | ||
}, | ||
ref_expr: None, | ||
} | ||
))), | ||
right_expr: Box::new(ScalarExpression::Constant(Arc::new( | ||
DataValue::Utf8(Some("abc".to_string())) | ||
))), | ||
ty: LogicalType::Boolean, | ||
}), | ||
right_expr: Box::new(ScalarExpression::Binary { | ||
op: BinaryOperator::Lt, | ||
left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new( | ||
ColumnCatalog { | ||
summary: ColumnSummary { | ||
id: Some(0), | ||
name: "c1".to_string(), | ||
}, | ||
nullable: false, | ||
desc: ColumnDesc { | ||
column_datatype: LogicalType::Integer, | ||
is_primary: true, | ||
is_unique: false, | ||
default: None, | ||
}, | ||
ref_expr: None, | ||
} | ||
))), | ||
right_expr: Box::new(ScalarExpression::Constant(Arc::new( | ||
DataValue::Utf8(Some("abd".to_string())) | ||
))), | ||
ty: LogicalType::Boolean, | ||
}), | ||
ty: LogicalType::Boolean, | ||
} | ||
); | ||
} | ||
_ => unreachable!(), | ||
} | ||
|
||
Ok(()) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the
operator_mut
method to modify directly instead ofreplace