diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 9aaf27420f45..2ecd4706a54e 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2452,6 +2452,12 @@ mod tests { regex_match(col("c1"), lit("^(foo|bar)$")), col("c1").eq(lit("bar")).or(col("c1").eq(lit("foo"))), ); + assert_change( + regex_not_match(col("c1"), lit("^(foo|bar)$")), + col("c1") + .not_eq(lit("bar")) + .and(col("c1").not_eq(lit("foo"))), + ); assert_no_change(regex_match(col("c1"), lit("^foo|bar$"))); assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$"))); assert_no_change(regex_match(col("c1"), lit("^"))); diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs b/datafusion/optimizer/src/simplify_expressions/regex.rs index 977b6ff59c11..299e48d3ad6c 100644 --- a/datafusion/optimizer/src/simplify_expressions/regex.rs +++ b/datafusion/optimizer/src/simplify_expressions/regex.rs @@ -166,7 +166,7 @@ fn is_anchored_literal(v: &[Hir]) -> bool { } /// returns true if the elements in a `Concat` pattern are: -/// - `[Look::Start, Capture(Alternation), Look::End]` +/// - `[Look::Start, Capture(Alternation(Literals...)), Look::End]` fn is_anchored_capture(v: &[Hir]) -> bool { if 3 != v.len() { return false; @@ -212,7 +212,7 @@ fn anchored_literal_to_expr(v: &[Hir]) -> Option { } } -fn anchored_alternation_to_expr(v: &[Hir]) -> Option> { +fn anchored_alternation_to_exprs(v: &[Hir]) -> Option> { if 3 != v.len() { return None; } @@ -220,22 +220,22 @@ fn anchored_alternation_to_expr(v: &[Hir]) -> Option> { if let HirKind::Capture(cap, ..) = v[1].kind() { let Capture { sub, .. } = cap; if let HirKind::Alternation(alters) = sub.kind() { - let literals: Vec<_> = alters - .iter() - .map(|l| { - if let HirKind::Literal(l) = l.kind() { - str_from_literal(l).map(lit) - } else { - None + let mut literals = Vec::with_capacity(alters.len()); + for hir in alters { + let mut is_safe = false; + if let HirKind::Literal(l) = hir.kind() { + if let Some(safe_literal) = str_from_literal(l).map(lit) { + literals.push(safe_literal); + is_safe = true; } - }) - .collect(); + } - if literals.iter().any(|l| l.is_none()) { - return None; - }; + if !is_safe { + return None; + } + } - return Some(literals.into_iter().map(|v| v.unwrap()).collect()); + return Some(literals); } } @@ -252,14 +252,13 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option { return Some(mode.expr(Box::new(left.clone()), format!("%{s}%"))); } HirKind::Concat(inner) if is_anchored_literal(inner) => { - let right = anchored_literal_to_expr(inner)?; - return Some( - mode.expr_matches_literal(Box::new(left.clone()), Box::new(right)), - ); + return anchored_literal_to_expr(inner).map(|right| { + mode.expr_matches_literal(Box::new(left.clone()), Box::new(right)) + }); } HirKind::Concat(inner) if is_anchored_capture(inner) => { - let right = anchored_alternation_to_expr(inner)?; - return Some(left.clone().in_list(right, false)); + return anchored_alternation_to_exprs(inner) + .map(|right| left.clone().in_list(right, mode.not)); } HirKind::Concat(inner) => { if let Some(pattern) = collect_concat_to_like_string(inner) {