Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fix: stop all kinds of expressions from cnf-exploding #14585

Merged
merged 5 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions go/vt/sqlparser/predicate_rewriting.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,19 @@ import (
"vitess.io/vitess/go/vt/log"
)

// This is the number of OR expressions in a predicate that will disable the CNF
// rewrite because we don't want to send large queries to MySQL
const CNFOrLimit = 5

// RewritePredicate walks the input AST and rewrites any boolean logic into a simpler form
// This simpler form is CNF plus logic for extracting predicates from OR, plus logic for turning ORs into IN
// Note: In order to re-plan, we need to empty the accumulated metadata in the AST,
// so ColName.Metadata will be nil:ed out as part of this rewrite
func RewritePredicate(ast SQLNode) SQLNode {
count := 0
_ = Walk(func(node SQLNode) (bool, error) {
if _, isExpr := node.(*OrExpr); isExpr {
count++
}

return true, nil
}, ast)
original := CloneSQLNode(ast)

allowCNF := count < CNFOrLimit
initialSize := size(ast)

for {
if size(ast) > initialSize*10 {
// the rewritten expression is growing too much.
// we'll abort here and return the original expression instead
return original
}
printExpr(ast)
exprChanged := false
stopOnChange := func(SQLNode, SQLNode) bool {
Expand All @@ -52,7 +44,7 @@ func RewritePredicate(ast SQLNode) SQLNode {
return true
}

rewritten, state := simplifyExpression(e, allowCNF)
rewritten, state := simplifyExpression(e)
if ch, isChange := state.(changed); isChange {
printRule(ch.rule, ch.exprMatched)
exprChanged = true
Expand All @@ -67,12 +59,23 @@ func RewritePredicate(ast SQLNode) SQLNode {
}
}

func simplifyExpression(expr Expr, allowCNF bool) (Expr, rewriteState) {
func size(expr SQLNode) (res int) {
_ = Walk(func(node SQLNode) (kontinue bool, err error) {
_, ok := node.(Expr)
if ok {
res++
}
return true, nil
}, expr)
return
}

func simplifyExpression(expr Expr) (Expr, rewriteState) {
switch expr := expr.(type) {
case *NotExpr:
return simplifyNot(expr)
case *OrExpr:
return simplifyOr(expr, allowCNF)
return simplifyOr(expr)
case *XorExpr:
return simplifyXor(expr)
case *AndExpr:
Expand Down Expand Up @@ -128,7 +131,7 @@ func ExtractINFromOR(expr *OrExpr) []Expr {
return uniquefy(ins)
}

func simplifyOr(expr *OrExpr, allowCNF bool) (Expr, rewriteState) {
func simplifyOr(expr *OrExpr) (Expr, rewriteState) {
or := expr

// first we search for ANDs and see how they can be simplified
Expand Down Expand Up @@ -166,11 +169,10 @@ func simplifyOr(expr *OrExpr, allowCNF bool) (Expr, rewriteState) {
return or.Right, newChange("(A AND B) OR A => A", f(expr))
}

if allowCNF {
// Distribution Law
return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}},
newChange("(A AND B) OR C => (A OR C) AND (B OR C)", f(expr))
}
// Distribution Law
return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}},
newChange("(A AND B) OR C => (A OR C) AND (B OR C)", f(expr))

}

// <> OR (<> AND <>)
Expand All @@ -180,14 +182,12 @@ func simplifyOr(expr *OrExpr, allowCNF bool) (Expr, rewriteState) {
return or.Left, newChange("A OR (A AND B) => A", f(expr))
}

if allowCNF {
// Distribution Law
return &AndExpr{
Left: &OrExpr{Left: or.Left, Right: rand.Left},
Right: &OrExpr{Left: or.Left, Right: rand.Right},
},
newChange("C OR (A AND B) => (C OR A) AND (C OR B)", f(expr))
}
// Distribution Law
return &AndExpr{
Left: &OrExpr{Left: or.Left, Right: rand.Left},
Right: &OrExpr{Left: or.Left, Right: rand.Right},
},
newChange("C OR (A AND B) => (C OR A) AND (C OR B)", f(expr))
}

// next, we want to try to turn multiple ORs into an IN when possible
Expand Down
7 changes: 5 additions & 2 deletions go/vt/sqlparser/predicate_rewriting_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func TestSimplifyExpression(in *testing.T) {
expr, err := ParseExpr(tc.in)
require.NoError(t, err)

expr, didRewrite := simplifyExpression(expr, true)
expr, didRewrite := simplifyExpression(expr)
assert.True(t, didRewrite.changed())
assert.Equal(t, tc.expected, String(expr))
})
Expand Down Expand Up @@ -137,9 +137,12 @@ func TestRewritePredicate(in *testing.T) {
in: "(a = 1 and b = 41) or (a = 2 and b = 42) or (a = 3 and b = 43)",
expected: "a in (1, 2, 3) and (a in (1, 2) or b = 43) and ((a = 1 or b = 42 or a = 3) and (a = 1 or b = 42 or b = 43)) and ((b = 41 or a = 2 or a = 3) and (b = 41 or a = 2 or b = 43) and ((b in (41, 42) or a = 3) and b in (41, 42, 43)))",
}, {
// this has too many OR expressions in it, so we don't even try the CNF rewriting
// the following two tests show some pathological cases that would grow too much, and so we abort the rewriting
in: "a = 1 and b = 41 or a = 2 and b = 42 or a = 3 and b = 43 or a = 4 and b = 44 or a = 5 and b = 45 or a = 6 and b = 46",
expected: "a = 1 and b = 41 or a = 2 and b = 42 or a = 3 and b = 43 or a = 4 and b = 44 or a = 5 and b = 45 or a = 6 and b = 46",
}, {
in: "not n0 xor not (n2 and n3) xor (not n2 and (n1 xor n1) xor (n0 xor n0 xor n2))",
expected: "not n0 xor not (n2 and n3) xor (not n2 and (n1 xor n1) xor (n0 xor n0 xor n2))",
}}

for _, tc := range tests {
Expand Down
Loading